// Package revision extracts git revision from string
// More information about revision : https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html
package revision
import (
"bytes"
"fmt"
"io"
"regexp"
"strconv"
"time"
)
// ErrInvalidRevision is emitted if string doesn't match valid revision
type ErrInvalidRevision struct {
s string
}
func (e *ErrInvalidRevision) Error() string {
return "Revision invalid : " + e.s
}
// Revisioner represents a revision component.
// A revision is made of multiple revision components
// obtained after parsing a revision string,
// for instance revision "master~" will be converted in
// two revision components Ref and TildePath
type Revisioner any
// Ref represents a reference name : HEAD, master, <hash>
type Ref string
// TildePath represents ~, ~{n}
type TildePath struct {
Depth int
}
// CaretPath represents ^, ^{n}
type CaretPath struct {
Depth int
}
// CaretReg represents ^{/foo bar}
type CaretReg struct {
Regexp *regexp.Regexp
Negate bool
}
// CaretType represents ^{commit}
type CaretType struct {
ObjectType string
}
// AtReflog represents @{n}
type AtReflog struct {
Depth int
}
// AtCheckout represents @{-n}
type AtCheckout struct {
Depth int
}
// AtUpstream represents @{upstream}, @{u}
type AtUpstream struct {
BranchName string
}
// AtPush represents @{push}
type AtPush struct {
BranchName string
}
// AtDate represents @{"2006-01-02T15:04:05Z"}
type AtDate struct {
Date time.Time
}
// ColonReg represents :/foo bar
type ColonReg struct {
Regexp *regexp.Regexp
Negate bool
}
// ColonPath represents :./<path> :<path>
type ColonPath struct {
Path string
}
// ColonStagePath represents :<n>:/<path>
type ColonStagePath struct {
Path string
Stage int
}
// Parser represents a parser
// use to tokenize and transform to revisioner chunks
// a given string
type Parser struct {
s *scanner
currentParsedChar struct {
tok token
lit string
}
unreadLastChar bool
}
// NewParserFromString returns a new instance of parser from a string.
func NewParserFromString(s string) *Parser {
return NewParser(bytes.NewBufferString(s))
}
// NewParser returns a new instance of parser.
func NewParser(r io.Reader) *Parser {
return &Parser{s: newScanner(r)}
}
// scan returns the next token from the underlying scanner
// or the last scanned token if an unscan was requested
func (p *Parser) scan() (token, string, error) {
if p.unreadLastChar {
p.unreadLastChar = false
return p.currentParsedChar.tok, p.currentParsedChar.lit, nil
}
tok, lit, err := p.s.scan()
p.currentParsedChar.tok, p.currentParsedChar.lit = tok, lit
return tok, lit, err
}
// unscan pushes the previously read token back onto the buffer.
func (p *Parser) unscan() { p.unreadLastChar = true }
// Parse explode a revision string into revisioner chunks
func (p *Parser) Parse() ([]Revisioner, error) {
var rev Revisioner
var revs []Revisioner
var tok token
var err error
for {
tok, _, err = p.scan()
if err != nil {
return nil, err
}
switch tok {
case at:
rev, err = p.parseAt()
case tilde:
rev, err = p.parseTilde()
case caret:
rev, err = p.parseCaret()
case colon:
rev, err = p.parseColon()
case eof:
err = p.validateFullRevision(&revs)
if err != nil {
return []Revisioner{}, err
}
return revs, nil
default:
p.unscan()
rev, err = p.parseRef()
}
if err != nil {
return []Revisioner{}, err
}
revs = append(revs, rev)
}
}
// validateFullRevision ensures all revisioner chunks make a valid revision
func (p *Parser) validateFullRevision(chunks *[]Revisioner) error {
var hasReference bool
for i, chunk := range *chunks {
switch chunk.(type) {
case Ref:
if i == 0 {
hasReference = true
} else {
return &ErrInvalidRevision{`reference must be defined once at the beginning`}
}
case AtDate:
if len(*chunks) == 1 || hasReference && len(*chunks) == 2 {
return nil
}
return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<ISO-8601 date>}, @{<ISO-8601 date>}`}
case AtReflog:
if len(*chunks) == 1 || hasReference && len(*chunks) == 2 {
return nil
}
return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<n>}, @{<n>}`}
case AtCheckout:
if len(*chunks) == 1 {
return nil
}
return &ErrInvalidRevision{`"@" statement is not valid, could be : @{-<n>}`}
case AtUpstream:
if len(*chunks) == 1 || hasReference && len(*chunks) == 2 {
return nil
}
return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{upstream}, @{upstream}, <refname>@{u}, @{u}`}
case AtPush:
if len(*chunks) == 1 || hasReference && len(*chunks) == 2 {
return nil
}
return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{push}, @{push}`}
case TildePath, CaretPath, CaretReg:
if !hasReference {
return &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`}
}
case ColonReg:
if len(*chunks) == 1 {
return nil
}
return &ErrInvalidRevision{`":" statement is not valid, could be : :/<regexp>`}
case ColonPath:
if i == len(*chunks)-1 && hasReference || len(*chunks) == 1 {
return nil
}
return &ErrInvalidRevision{`":" statement is not valid, could be : <revision>:<path>`}
case ColonStagePath:
if len(*chunks) == 1 {
return nil
}
return &ErrInvalidRevision{`":" statement is not valid, could be : :<n>:<path>`}
}
}
return nil
}
// parseAt extract @ statements
func (p *Parser) parseAt() (Revisioner, error) {
var tok, nextTok token
var lit, nextLit string
var err error
tok, _, err = p.scan()
if err != nil {
return nil, err
}
if tok != obrace {
p.unscan()
return Ref("HEAD"), nil
}
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
nextTok, nextLit, err = p.scan()
if err != nil {
return nil, err
}
switch {
case tok == word && (lit == "u" || lit == "upstream") && nextTok == cbrace:
return AtUpstream{}, nil
case tok == word && lit == "push" && nextTok == cbrace:
return AtPush{}, nil
case tok == number && nextTok == cbrace:
n, _ := strconv.Atoi(lit)
return AtReflog{n}, nil
case tok == minus && nextTok == number:
n, _ := strconv.Atoi(nextLit)
t, _, err := p.scan()
if err != nil {
return nil, err
}
if t != cbrace {
return nil, &ErrInvalidRevision{s: `missing "}" in @{-n} structure`}
}
return AtCheckout{n}, nil
default:
p.unscan()
date := lit
for {
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
switch tok {
case cbrace:
t, err := time.Parse("2006-01-02T15:04:05Z", date)
if err != nil {
return nil, &ErrInvalidRevision{fmt.Sprintf(`wrong date "%s" must fit ISO-8601 format : 2006-01-02T15:04:05Z`, date)}
}
return AtDate{t}, nil
case eof:
return nil, &ErrInvalidRevision{s: `missing "}" in @{<data>} structure`}
default:
date += lit
}
}
}
}
// parseTilde extract ~ statements
func (p *Parser) parseTilde() (Revisioner, error) {
var tok token
var lit string
var err error
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
switch tok {
case number:
n, _ := strconv.Atoi(lit)
return TildePath{n}, nil
default:
p.unscan()
return TildePath{1}, nil
}
}
// parseCaret extract ^ statements
func (p *Parser) parseCaret() (Revisioner, error) {
var tok token
var lit string
var err error
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
switch tok {
case obrace:
r, err := p.parseCaretBraces()
if err != nil {
return nil, err
}
return r, nil
case number:
n, _ := strconv.Atoi(lit)
if n > 2 {
return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" found must be 0, 1 or 2 after "^"`, lit)}
}
return CaretPath{n}, nil
default:
p.unscan()
return CaretPath{1}, nil
}
}
// parseCaretBraces extract ^{<data>} statements
func (p *Parser) parseCaretBraces() (Revisioner, error) {
var tok, nextTok token
var lit, _ string
start := true
var re string
var negate bool
var err error
for {
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
nextTok, _, err = p.scan()
if err != nil {
return nil, err
}
switch {
case tok == word && nextTok == cbrace && (lit == "commit" || lit == "tree" || lit == "blob" || lit == "tag" || lit == "object"):
return CaretType{lit}, nil
case re == "" && tok == cbrace:
return CaretType{"tag"}, nil
case re == "" && tok == emark && nextTok == emark:
re += lit
case re == "" && tok == emark && nextTok == minus:
negate = true
case re == "" && tok == emark:
return nil, &ErrInvalidRevision{s: `revision suffix brace component sequences starting with "/!" others than those defined are reserved`}
case re == "" && tok == slash:
p.unscan()
case tok != slash && start:
return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" is not a valid revision suffix brace component`, lit)}
case tok == eof:
return nil, &ErrInvalidRevision{s: `missing "}" in ^{<data>} structure`}
case tok != cbrace:
p.unscan()
re += lit
case tok == cbrace:
p.unscan()
reg, err := regexp.Compile(re)
if err != nil {
return CaretReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())}
}
return CaretReg{reg, negate}, nil
}
start = false
}
}
// parseColon extract : statements
func (p *Parser) parseColon() (Revisioner, error) {
var tok token
var err error
tok, _, err = p.scan()
if err != nil {
return nil, err
}
switch tok {
case slash:
return p.parseColonSlash()
default:
p.unscan()
return p.parseColonDefault()
}
}
// parseColonSlash extract :/<data> statements
func (p *Parser) parseColonSlash() (Revisioner, error) {
var tok, nextTok token
var lit string
var re string
var negate bool
var err error
for {
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
nextTok, _, err = p.scan()
if err != nil {
return nil, err
}
switch {
case tok == emark && nextTok == emark:
re += lit
case re == "" && tok == emark && nextTok == minus:
negate = true
case re == "" && tok == emark:
return nil, &ErrInvalidRevision{s: `revision suffix brace component sequences starting with "/!" others than those defined are reserved`}
case tok == eof:
p.unscan()
reg, err := regexp.Compile(re)
if err != nil {
return ColonReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())}
}
return ColonReg{reg, negate}, nil
default:
p.unscan()
re += lit
}
}
}
// parseColonDefault extract :<data> statements
func (p *Parser) parseColonDefault() (Revisioner, error) {
var tok token
var lit string
var path string
var stage int
var err error
n := -1
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
nextTok, _, err := p.scan()
if err != nil {
return nil, err
}
if tok == number && nextTok == colon {
n, _ = strconv.Atoi(lit)
}
switch n {
case 0, 1, 2, 3:
stage = n
default:
path += lit
p.unscan()
}
for {
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
switch {
case tok == eof && n == -1:
return ColonPath{path}, nil
case tok == eof:
return ColonStagePath{path, stage}, nil
default:
path += lit
}
}
}
// parseRef extract reference name
func (p *Parser) parseRef() (Revisioner, error) {
var tok, prevTok token
var lit, buf string
var endOfRef bool
var err error
for {
tok, lit, err = p.scan()
if err != nil {
return nil, err
}
switch tok {
case eof, at, colon, tilde, caret:
endOfRef = true
}
err := p.checkRefFormat(tok, lit, prevTok, buf, endOfRef)
if err != nil {
return "", err
}
if endOfRef {
p.unscan()
return Ref(buf), nil
}
buf += lit
prevTok = tok
}
}
// checkRefFormat ensure reference name follow rules defined here :
// https://git-scm.com/docs/git-check-ref-format
func (p *Parser) checkRefFormat(token token, literal string, previousToken token, buffer string, endOfRef bool) error {
switch token {
case aslash, space, control, qmark, asterisk, obracket:
return &ErrInvalidRevision{fmt.Sprintf(`must not contains "%s"`, literal)}
}
switch {
case (token == dot || token == slash) && buffer == "":
return &ErrInvalidRevision{fmt.Sprintf(`must not start with "%s"`, literal)}
case previousToken == slash && endOfRef:
return &ErrInvalidRevision{`must not end with "/"`}
case previousToken == dot && endOfRef:
return &ErrInvalidRevision{`must not end with "."`}
case token == dot && previousToken == slash:
return &ErrInvalidRevision{`must not contains "/."`}
case previousToken == dot && token == dot:
return &ErrInvalidRevision{`must not contains ".."`}
case previousToken == slash && token == slash:
return &ErrInvalidRevision{`must not contains consecutively "/"`}
case (token == slash || endOfRef) && len(buffer) > 4 && buffer[len(buffer)-5:] == ".lock":
return &ErrInvalidRevision{"cannot end with .lock"}
}
return nil
}
package revision
import (
"bufio"
"io"
"unicode"
)
// runeCategoryValidator takes a rune as input and
// validates it belongs to a rune category
type runeCategoryValidator func(r rune) bool
// tokenizeExpression aggregates a series of runes matching check predicate into a single
// string and provides given tokenType as token type
func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) {
var data []rune
data = append(data, ch)
for {
c, _, err := r.ReadRune()
if c == zeroRune {
break
}
if err != nil {
return tokenError, "", err
}
if check(c) {
data = append(data, c)
} else {
err := r.UnreadRune()
if err != nil {
return tokenError, "", err
}
return tokenType, string(data), nil
}
}
return tokenType, string(data), nil
}
// maxRevisionLength holds the maximum length that will be parsed for a
// revision. Git itself doesn't enforce a max length, but rather leans on
// the OS to enforce it via its ARG_MAX.
const maxRevisionLength = 128 * 1024 // 128kb
var zeroRune = rune(0)
// scanner represents a lexical scanner.
type scanner struct {
r *bufio.Reader
}
// newScanner returns a new instance of scanner.
func newScanner(r io.Reader) *scanner {
return &scanner{r: bufio.NewReader(io.LimitReader(r, maxRevisionLength))}
}
// Scan extracts tokens and their strings counterpart
// from the reader
func (s *scanner) scan() (token, string, error) {
ch, _, err := s.r.ReadRune()
if err != nil && err != io.EOF {
return tokenError, "", err
}
switch ch {
case zeroRune:
return eof, "", nil
case ':':
return colon, string(ch), nil
case '~':
return tilde, string(ch), nil
case '^':
return caret, string(ch), nil
case '.':
return dot, string(ch), nil
case '/':
return slash, string(ch), nil
case '{':
return obrace, string(ch), nil
case '}':
return cbrace, string(ch), nil
case '-':
return minus, string(ch), nil
case '@':
return at, string(ch), nil
case '\\':
return aslash, string(ch), nil
case '?':
return qmark, string(ch), nil
case '*':
return asterisk, string(ch), nil
case '[':
return obracket, string(ch), nil
case '!':
return emark, string(ch), nil
}
if unicode.IsSpace(ch) {
return space, string(ch), nil
}
if unicode.IsControl(ch) {
return control, string(ch), nil
}
if unicode.IsLetter(ch) {
return tokenizeExpression(ch, word, unicode.IsLetter, s.r)
}
if unicode.IsNumber(ch) {
return tokenizeExpression(ch, number, unicode.IsNumber, s.r)
}
return tokenError, string(ch), nil
}
package config
// New creates a new config instance.
func New() *Config {
return &Config{}
}
// Config contains all the sections, comments and includes from a config file.
type Config struct {
Comment *Comment
Sections Sections
Includes Includes
}
// Includes is a list of Includes in a config file.
type Includes []*Include
// Include is a reference to an included config file.
type Include struct {
Path string
Config *Config
}
// Comment string without the prefix '#' or ';'.
type Comment string
const (
// NoSubsection token is passed to Config.Section and Config.SetSection to
// represent the absence of a section.
NoSubsection = ""
)
// Section returns a existing section with the given name or creates a new one.
func (c *Config) Section(name string) *Section {
for i := len(c.Sections) - 1; i >= 0; i-- {
s := c.Sections[i]
if s.IsName(name) {
return s
}
}
s := &Section{Name: name}
c.Sections = append(c.Sections, s)
return s
}
// HasSection checks if the Config has a section with the specified name.
func (c *Config) HasSection(name string) bool {
for _, s := range c.Sections {
if s.IsName(name) {
return true
}
}
return false
}
// RemoveSection removes a section from a config file.
func (c *Config) RemoveSection(name string) *Config {
result := Sections{}
for _, s := range c.Sections {
if !s.IsName(name) {
result = append(result, s)
}
}
c.Sections = result
return c
}
// RemoveSubsection remove a subsection from a config file.
func (c *Config) RemoveSubsection(section, subsection string) *Config {
for _, s := range c.Sections {
if s.IsName(section) {
result := Subsections{}
for _, ss := range s.Subsections {
if !ss.IsName(subsection) {
result = append(result, ss)
}
}
s.Subsections = result
}
}
return c
}
// AddOption adds an option to a given section and subsection. Use the
// NoSubsection constant for the subsection argument if no subsection is wanted.
func (c *Config) AddOption(section, subsection, key, value string) *Config {
if subsection == "" {
c.Section(section).AddOption(key, value)
} else {
c.Section(section).Subsection(subsection).AddOption(key, value)
}
return c
}
// SetOption sets an option to a given section and subsection. Use the
// NoSubsection constant for the subsection argument if no subsection is wanted.
func (c *Config) SetOption(section, subsection, key, value string) *Config {
if subsection == "" {
c.Section(section).SetOption(key, value)
} else {
c.Section(section).Subsection(subsection).SetOption(key, value)
}
return c
}
package config
import (
"io"
"github.com/go-git/gcfg/v2"
)
// A Decoder reads and decodes config files from an input stream.
type Decoder struct {
io.Reader
}
// NewDecoder returns a new decoder that reads from r.
func NewDecoder(r io.Reader) *Decoder {
return &Decoder{r}
}
// Decode reads the whole config from its input and stores it in the
// value pointed to by config.
func (d *Decoder) Decode(config *Config) error {
cb := func(s, ss, k, v string, _ bool) error {
if ss == "" && k == "" {
config.Section(s)
return nil
}
if ss != "" && k == "" {
config.Section(s).Subsection(ss)
return nil
}
config.AddOption(s, ss, k, v)
return nil
}
return gcfg.ReadWithCallback(d, cb)
}
package config
import (
"fmt"
"io"
"strings"
)
// An Encoder writes config files to an output stream.
type Encoder struct {
w io.Writer
}
var (
subsectionReplacer = strings.NewReplacer(`"`, `\"`, `\`, `\\`)
valueReplacer = strings.NewReplacer(`"`, `\"`, `\`, `\\`, "\n", `\n`, "\t", `\t`, "\b", `\b`)
)
// NewEncoder returns a new encoder that writes to w.
func NewEncoder(w io.Writer) *Encoder {
return &Encoder{w}
}
// Encode writes the config in git config format to the stream of the encoder.
func (e *Encoder) Encode(cfg *Config) error {
for _, s := range cfg.Sections {
if err := e.encodeSection(s); err != nil {
return err
}
}
return nil
}
func (e *Encoder) encodeSection(s *Section) error {
if len(s.Options) > 0 {
if err := e.printf("[%s]\n", s.Name); err != nil {
return err
}
if err := e.encodeOptions(s.Options); err != nil {
return err
}
}
for _, ss := range s.Subsections {
if err := e.encodeSubsection(s.Name, ss); err != nil {
return err
}
}
return nil
}
func (e *Encoder) encodeSubsection(sectionName string, s *Subsection) error {
if err := e.printf("[%s \"%s\"]\n", sectionName, subsectionReplacer.Replace(s.Name)); err != nil {
return err
}
return e.encodeOptions(s.Options)
}
func (e *Encoder) encodeOptions(opts Options) error {
for _, o := range opts {
var value string
if strings.ContainsAny(o.Value, "#;\"\t\n\\") || strings.HasPrefix(o.Value, " ") || strings.HasSuffix(o.Value, " ") {
value = `"` + valueReplacer.Replace(o.Value) + `"`
} else {
value = o.Value
}
if err := e.printf("\t%s = %s\n", o.Key, value); err != nil {
return err
}
}
return nil
}
func (e *Encoder) printf(msg string, args ...any) error {
_, err := fmt.Fprintf(e.w, msg, args...)
return err
}
package config
import "errors"
// RepositoryFormatVersion represents the repository format version,
// as per defined at:
//
// https://git-scm.com/docs/repository-version
type RepositoryFormatVersion string
const (
// Version0 is the format defined by the initial version of git,
// including but not limited to the format of the repository
// directory, the repository configuration file, and the object
// and ref storage.
//
// Specifying the complete behavior of git is beyond the scope
// of this document.
Version0 = "0"
// Version1 is identical to version 0, with the following exceptions:
//
// 1. When reading the core.repositoryformatversion variable, a git
// implementation which supports version 1 MUST also read any
// configuration keys found in the extensions section of the
// configuration file.
//
// 2. If a version-1 repository specifies any extensions.* keys that
// the running git has not implemented, the operation MUST NOT proceed.
// Similarly, if the value of any known key is not understood by the
// implementation, the operation MUST NOT proceed.
//
// Note that if no extensions are specified in the config file, then
// core.repositoryformatversion SHOULD be set to 0 (setting it to 1 provides
// no benefit, and makes the repository incompatible with older
// implementations of git).
Version1 = "1"
// DefaultRepositoryFormatVersion holds the default repository format version.
DefaultRepositoryFormatVersion = Version0
)
// ObjectFormat defines the object format.
type ObjectFormat int
const (
// SHA1 represents the object format used for SHA1.
SHA1 ObjectFormat = iota
// SHA256 represents the object format used for SHA256.
SHA256
// DefaultObjectFormat holds the default object format.
DefaultObjectFormat = SHA1
)
// String returns the string representation of the ObjectFormat.
func (f ObjectFormat) String() string {
switch f {
case SHA1:
return "sha1"
case SHA256:
return "sha256"
default:
return ""
}
}
// Size returns the hash size of the ObjectFormat.
func (f ObjectFormat) Size() int {
switch f {
case SHA1:
return SHA1Size
case SHA256:
return SHA256Size
default:
return 0
}
}
// HexSize returns the hash size in hexadecimal format of the ObjectFormat.
func (f ObjectFormat) HexSize() int {
switch f {
case SHA1:
return SHA1HexSize
case SHA256:
return SHA256HexSize
default:
return 0
}
}
// ErrInvalidObjectFormat is returned when an invalid ObjectFormat is used.
var ErrInvalidObjectFormat = errors.New("invalid object format")
const (
// SHA1Size is the size of SHA1 hash.
SHA1Size = 20
// SHA256Size is the size of SHA256 hash.
SHA256Size = 32
// SHA1HexSize is the size of SHA1 hash in hexadecimal format.
SHA1HexSize = SHA1Size * 2
// SHA256HexSize is the size of SHA256 hash in hexadecimal format.
SHA256HexSize = SHA256Size * 2
)
package config
import (
"fmt"
"slices"
"strings"
)
// Option defines a key/value entity in a config file.
type Option struct {
// Key preserving original caseness.
// Use IsKey instead to compare key regardless of caseness.
Key string
// Original value as string, could be not normalized.
Value string
}
// Options is a collection of Option.
type Options []*Option
// IsKey returns true if the given key matches
// this option's key in a case-insensitive comparison.
func (o *Option) IsKey(key string) bool {
return strings.EqualFold(o.Key, key)
}
// GoString returns a Go-syntax representation of Options.
func (opts Options) GoString() string {
strs := make([]string, 0, len(opts))
for _, opt := range opts {
strs = append(strs, fmt.Sprintf("%#v", opt))
}
return strings.Join(strs, ", ")
}
// Get gets the value for the given key if set,
// otherwise it returns the empty string.
//
// # Note that there is no difference
//
// This matches git behaviour since git v1.8.1-rc1,
// if there are multiple definitions of a key, the
// last one wins.
//
// See: http://article.gmane.org/gmane.linux.kernel/1407184
//
// In order to get all possible values for the same key,
// use GetAll.
func (opts Options) Get(key string) string {
for i := len(opts) - 1; i >= 0; i-- {
o := opts[i]
if o.IsKey(key) {
return o.Value
}
}
return ""
}
// Has checks if an Option exist with the given key.
func (opts Options) Has(key string) bool {
for _, o := range opts {
if o.IsKey(key) {
return true
}
}
return false
}
// GetAll returns all possible values for the same key.
func (opts Options) GetAll(key string) []string {
result := []string{}
for _, o := range opts {
if o.IsKey(key) {
result = append(result, o.Value)
}
}
return result
}
func (opts Options) withoutOption(key string) Options {
result := Options{}
for _, o := range opts {
if !o.IsKey(key) {
result = append(result, o)
}
}
return result
}
func (opts Options) withAddedOption(key, value string) Options {
return append(opts, &Option{key, value})
}
func (opts Options) withSettedOption(key string, values ...string) Options {
var result Options
var added []string
for _, o := range opts {
if !o.IsKey(key) {
result = append(result, o)
continue
}
if slices.Contains(values, o.Value) {
added = append(added, o.Value)
result = append(result, o)
continue
}
}
for _, value := range values {
if slices.Contains(added, value) {
continue
}
result = result.withAddedOption(key, value)
}
return result
}
package config
import (
"fmt"
"strings"
)
// Section is the representation of a section inside git configuration files.
// Each Section contains Options that are used by both the Git plumbing
// and the porcelains.
// Sections can be further divided into subsections. To begin a subsection
// put its name in double quotes, separated by space from the section name,
// in the section header, like in the example below:
//
// [section "subsection"]
//
// All the other lines (and the remainder of the line after the section header)
// are recognized as option variables, in the form "name = value" (or just name,
// which is a short-hand to say that the variable is the boolean "true").
// The variable names are case-insensitive, allow only alphanumeric characters
// and -, and must start with an alphabetic character:
//
// [section "subsection1"]
// option1 = value1
// option2
// [section "subsection2"]
// option3 = value2
type Section struct {
Name string
Options Options
Subsections Subsections
}
// Subsection is a subsection of a Section.
type Subsection struct {
Name string
Options Options
}
// Sections is a collection of Section.
type Sections []*Section
// GoString returns a Go-syntax representation of Sections.
func (s Sections) GoString() string {
strs := make([]string, 0, len(s))
for _, ss := range s {
strs = append(strs, fmt.Sprintf("%#v", ss))
}
return strings.Join(strs, ", ")
}
// Subsections is a collection of Subsection.
type Subsections []*Subsection
// GoString returns a Go-syntax representation of Subsections.
func (s Subsections) GoString() string {
strs := make([]string, 0, len(s))
for _, ss := range s {
strs = append(strs, fmt.Sprintf("%#v", ss))
}
return strings.Join(strs, ", ")
}
// IsName checks if the name provided is equals to the Section name, case insensitive.
func (s *Section) IsName(name string) bool {
return strings.EqualFold(s.Name, name)
}
// Subsection returns a Subsection from the specified Section. If the
// Subsection does not exists, new one is created and added to Section.
func (s *Section) Subsection(name string) *Subsection {
for i := len(s.Subsections) - 1; i >= 0; i-- {
ss := s.Subsections[i]
if ss.IsName(name) {
return ss
}
}
ss := &Subsection{Name: name}
s.Subsections = append(s.Subsections, ss)
return ss
}
// HasSubsection checks if the Section has a Subsection with the specified name.
func (s *Section) HasSubsection(name string) bool {
for _, ss := range s.Subsections {
if ss.IsName(name) {
return true
}
}
return false
}
// RemoveSubsection removes a subsection from a Section.
func (s *Section) RemoveSubsection(name string) *Section {
result := Subsections{}
for _, s := range s.Subsections {
if !s.IsName(name) {
result = append(result, s)
}
}
s.Subsections = result
return s
}
// Option returns the value for the specified key. Empty string is returned if
// key does not exists.
func (s *Section) Option(key string) string {
return s.Options.Get(key)
}
// OptionAll returns all possible values for an option with the specified key.
// If the option does not exists, an empty slice will be returned.
func (s *Section) OptionAll(key string) []string {
return s.Options.GetAll(key)
}
// HasOption checks if the Section has an Option with the given key.
func (s *Section) HasOption(key string) bool {
return s.Options.Has(key)
}
// AddOption adds a new Option to the Section. The updated Section is returned.
func (s *Section) AddOption(key, value string) *Section {
s.Options = s.Options.withAddedOption(key, value)
return s
}
// SetOption adds a new Option to the Section. If the option already exists, is replaced.
// The updated Section is returned.
func (s *Section) SetOption(key, value string) *Section {
s.Options = s.Options.withSettedOption(key, value)
return s
}
// RemoveOption removes an option with the specified key. The updated Section is returned.
func (s *Section) RemoveOption(key string) *Section {
s.Options = s.Options.withoutOption(key)
return s
}
// IsName checks if the name of the subsection is exactly the specified name.
func (s *Subsection) IsName(name string) bool {
return s.Name == name
}
// Option returns an option with the specified key. If the option does not exists,
// empty spring will be returned.
func (s *Subsection) Option(key string) string {
return s.Options.Get(key)
}
// OptionAll returns all possible values for an option with the specified key.
// If the option does not exists, an empty slice will be returned.
func (s *Subsection) OptionAll(key string) []string {
return s.Options.GetAll(key)
}
// HasOption checks if the Subsection has an Option with the given key.
func (s *Subsection) HasOption(key string) bool {
return s.Options.Has(key)
}
// AddOption adds a new Option to the Subsection. The updated Subsection is returned.
func (s *Subsection) AddOption(key, value string) *Subsection {
s.Options = s.Options.withAddedOption(key, value)
return s
}
// SetOption adds a new Option to the Subsection. If the option already exists, is replaced.
// The updated Subsection is returned.
func (s *Subsection) SetOption(key string, value ...string) *Subsection {
s.Options = s.Options.withSettedOption(key, value...)
return s
}
// RemoveOption removes the option with the specified key. The updated Subsection is returned.
func (s *Subsection) RemoveOption(key string) *Subsection {
s.Options = s.Options.withoutOption(key)
return s
}
package packfile
import (
"io"
"time"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/trace"
)
var signature = []byte{'P', 'A', 'C', 'K'}
const (
// VersionSupported is the packfile version supported by this package
VersionSupported uint32 = 2
firstLengthBits = uint8(4) // the first byte into object header has 4 bits to store the length
lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length
maskFirstLength = 15 // 0000 1111
maskContinue = 0x80 // 1000 0000
maskLength = uint8(127) // 0111 1111
maskType = uint8(112) // 0111 0000
)
// UpdateObjectStorage updates the storer with the objects in the given
// packfile.
func UpdateObjectStorage(s storer.Storer, packfile io.Reader) error {
start := time.Now()
defer func() {
trace.Performance.Printf("performance: %.9f s: update_obj_storage", time.Since(start).Seconds())
}()
if pw, ok := s.(storer.PackfileWriter); ok {
return WritePackfileToObjectStorage(pw, packfile)
}
p := NewParser(packfile, WithStorage(s))
_, err := p.Parse()
return err
}
// WritePackfileToObjectStorage writes all the packfile objects into the given
// object storage.
func WritePackfileToObjectStorage(
sw storer.PackfileWriter,
packfile io.Reader,
) (err error) {
w, err := sw.PackfileWriter()
if err != nil {
return err
}
defer ioutil.CheckClose(w, &err)
n, err := ioutil.CopyBufferPool(w, packfile)
if err == nil && n == 0 {
return ErrEmptyPackfile
}
return err
}
package packfile
const (
blksz = 16
maxChainLength = 64
)
// deltaIndex is a modified version of JGit's DeltaIndex adapted to our current
// design.
type deltaIndex struct {
table []int
entries []int
mask int
}
func (idx *deltaIndex) init(buf []byte) {
scanner := newDeltaIndexScanner(buf, len(buf))
idx.mask = scanner.mask
idx.table = scanner.table
idx.entries = make([]int, countEntries(scanner)+1)
idx.copyEntries(scanner)
}
// findMatch returns the offset of src where the block starting at tgtOffset
// is and the length of the match. A length of 0 means there was no match. A
// length of -1 means the src length is lower than the blksz and whatever
// other positive length is the length of the match in bytes.
func (idx *deltaIndex) findMatch(src, tgt []byte, tgtOffset int) (srcOffset, l int) {
if len(tgt) < tgtOffset+s {
return 0, len(tgt) - tgtOffset
}
if len(src) < blksz {
return 0, -1
}
h := hashBlock(tgt, tgtOffset)
tIdx := h & idx.mask
eIdx := idx.table[tIdx]
if eIdx == 0 {
return srcOffset, l
}
srcOffset = idx.entries[eIdx]
l = matchLength(src, tgt, tgtOffset, srcOffset)
return srcOffset, l
}
func matchLength(src, tgt []byte, otgt, osrc int) (l int) {
lensrc := len(src)
lentgt := len(tgt)
for (osrc < lensrc && otgt < lentgt) && src[osrc] == tgt[otgt] {
l++
osrc++
otgt++
}
return l
}
func countEntries(scan *deltaIndexScanner) (cnt int) {
// Figure out exactly how many entries we need. As we do the
// enumeration truncate any delta chains longer than what we
// are willing to scan during encode. This keeps the encode
// logic linear in the size of the input rather than quadratic.
for i := 0; i < len(scan.table); i++ {
h := scan.table[i]
if h == 0 {
continue
}
size := 0
for {
size++
if size == maxChainLength {
scan.next[h] = 0
break
}
h = scan.next[h]
if h == 0 {
break
}
}
cnt += size
}
return cnt
}
func (idx *deltaIndex) copyEntries(scanner *deltaIndexScanner) {
// Rebuild the entries list from the scanner, positioning all
// blocks in the same hash chain next to each other. We can
// then later discard the next list, along with the scanner.
//
next := 1
for i := 0; i < len(idx.table); i++ {
h := idx.table[i]
if h == 0 {
continue
}
idx.table[i] = next
for {
idx.entries[next] = scanner.entries[h]
next++
h = scanner.next[h]
if h == 0 {
break
}
}
}
}
type deltaIndexScanner struct {
table []int
entries []int
next []int
mask int
count int
}
func newDeltaIndexScanner(buf []byte, size int) *deltaIndexScanner {
size -= size % blksz
worstCaseBlockCnt := size / blksz
if worstCaseBlockCnt < 1 {
return new(deltaIndexScanner)
}
tableSize := tableSize(worstCaseBlockCnt)
scanner := &deltaIndexScanner{
table: make([]int, tableSize),
mask: tableSize - 1,
entries: make([]int, worstCaseBlockCnt+1),
next: make([]int, worstCaseBlockCnt+1),
}
scanner.scan(buf, size)
return scanner
}
// slightly modified version of JGit's DeltaIndexScanner. We store the offset on the entries
// instead of the entries and the key, so we avoid operations to retrieve the offset later, as
// we don't use the key.
// See: https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaIndexScanner.java
func (s *deltaIndexScanner) scan(buf []byte, end int) {
lastHash := 0
ptr := end - blksz
for {
key := hashBlock(buf, ptr)
tIdx := key & s.mask
head := s.table[tIdx]
if head != 0 && lastHash == key {
s.entries[head] = ptr
} else {
s.count++
eIdx := s.count
s.entries[eIdx] = ptr
s.next[eIdx] = head
s.table[tIdx] = eIdx
}
lastHash = key
ptr -= blksz
if 0 > ptr {
break
}
}
}
func tableSize(worstCaseBlockCnt int) int {
shift := 32 - leadingZeros(uint32(worstCaseBlockCnt))
sz := 1 << uint(shift-1)
if sz < worstCaseBlockCnt {
sz <<= 1
}
return sz
}
// use https://golang.org/pkg/math/bits/#LeadingZeros32 in the future
func leadingZeros(x uint32) (n int) {
if x >= 1<<16 {
x >>= 16
n = 16
}
if x >= 1<<8 {
x >>= 8
n += 8
}
n += int(len8tab[x])
return 32 - n
}
var len8tab = [256]uint8{
0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08,
}
func hashBlock(raw []byte, ptr int) int {
// The first 4 steps collapse out into a 4 byte big-endian decode,
// with a larger right shift as we combined shift lefts together.
//
hash := ((uint32(raw[ptr]) & 0xff) << 24) |
((uint32(raw[ptr+1]) & 0xff) << 16) |
((uint32(raw[ptr+2]) & 0xff) << 8) |
(uint32(raw[ptr+3]) & 0xff)
hash ^= T[hash>>31]
hash = ((hash << 8) | (uint32(raw[ptr+4]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+5]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+6]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+7]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+8]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+9]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+10]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+11]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+12]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+13]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+14]) & 0xff)) ^ T[hash>>23]
hash = ((hash << 8) | (uint32(raw[ptr+15]) & 0xff)) ^ T[hash>>23]
return int(hash)
}
// T is the hash lookup table for delta index computation.
var T = []uint32{
0x00000000, 0xd4c6b32d, 0x7d4bd577,
0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99,
0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45,
0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c,
0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895,
0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd,
0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f,
0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181,
0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e,
0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770,
0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d,
0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5,
0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c,
0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084,
0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558,
0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6,
0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788,
0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66,
0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba,
0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c,
0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105,
0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d,
0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990,
0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e,
0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61,
0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f,
0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f,
0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17,
0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e,
0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7,
0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b,
0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5,
0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4,
0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a,
0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96,
0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df,
0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46,
0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e,
0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62,
0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c,
0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93,
0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d,
0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680,
0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8,
0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071,
0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657,
0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b,
0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965,
0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b,
0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5,
0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69,
0xe4fe0d44, 0x4d736b1e, 0x99b5d833,
}
package packfile
import (
"sort"
"sync"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
)
const (
// deltas based on deltas, how many steps we can do.
// 50 is the default value used in JGit
maxDepth = int64(50)
)
// applyDelta is the set of object types that we should apply deltas
var applyDelta = map[plumbing.ObjectType]bool{
plumbing.BlobObject: true,
plumbing.TreeObject: true,
}
type deltaSelector struct {
storer storer.EncodedObjectStorer
}
func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector {
return &deltaSelector{s}
}
// ObjectsToPack creates a list of ObjectToPack from the hashes
// provided, creating deltas if it's suitable, using an specific
// internal logic. `packWindow` specifies the size of the sliding
// window used to compare objects for delta compression; 0 turns off
// delta compression entirely.
func (dw *deltaSelector) ObjectsToPack(
hashes []plumbing.Hash,
packWindow uint,
) ([]*ObjectToPack, error) {
otp, err := dw.objectsToPack(hashes, packWindow)
if err != nil {
return nil, err
}
if packWindow == 0 {
return otp, nil
}
dw.sort(otp)
var objectGroups [][]*ObjectToPack
var prev *ObjectToPack
i := -1
for _, obj := range otp {
if prev == nil || prev.Type() != obj.Type() {
objectGroups = append(objectGroups, []*ObjectToPack{obj})
i++
prev = obj
} else {
objectGroups[i] = append(objectGroups[i], obj)
}
}
var wg sync.WaitGroup
var once sync.Once
for _, objs := range objectGroups {
wg.Add(1)
go func() {
if walkErr := dw.walk(objs, packWindow); walkErr != nil {
once.Do(func() {
err = walkErr
})
}
wg.Done()
}()
}
wg.Wait()
if err != nil {
return nil, err
}
return otp, nil
}
func (dw *deltaSelector) objectsToPack(
hashes []plumbing.Hash,
packWindow uint,
) ([]*ObjectToPack, error) {
objectsToPack := make([]*ObjectToPack, 0, len(hashes))
for _, h := range hashes {
var o plumbing.EncodedObject
var err error
if packWindow == 0 {
o, err = dw.encodedObject(h)
} else {
o, err = dw.encodedDeltaObject(h)
}
if err != nil {
return nil, err
}
otp := newObjectToPack(o)
if _, ok := o.(plumbing.DeltaObject); ok {
otp.CleanOriginal()
}
objectsToPack = append(objectsToPack, otp)
}
if packWindow == 0 {
return objectsToPack, nil
}
if err := dw.fixAndBreakChains(objectsToPack); err != nil {
return nil, err
}
return objectsToPack, nil
}
func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
edos, ok := dw.storer.(storer.DeltaObjectStorer)
if !ok {
return dw.encodedObject(h)
}
return edos.DeltaObject(plumbing.AnyObject, h)
}
func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) {
return dw.storer.EncodedObject(plumbing.AnyObject, h)
}
func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error {
m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack))
for _, otp := range objectsToPack {
m[otp.Hash()] = otp
}
for _, otp := range objectsToPack {
if err := dw.fixAndBreakChainsOne(m, otp); err != nil {
return err
}
}
return nil
}
func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error {
if !otp.Object.Type().IsDelta() {
return nil
}
// Initial ObjectToPack instances might have a delta assigned to Object
// but no actual base initially. Once Base is assigned to a delta, it means
// we already fixed it.
if otp.Base != nil {
return nil
}
do, ok := otp.Object.(plumbing.DeltaObject)
if !ok {
// if this is not a DeltaObject, then we cannot retrieve its base,
// so we have to break the delta chain here.
return dw.undeltify(otp)
}
base, ok := objectsToPack[do.BaseHash()]
if !ok {
// The base of the delta is not in our list of objects to pack, so
// we break the chain.
return dw.undeltify(otp)
}
if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil {
return err
}
otp.SetDelta(base, otp.Object)
return nil
}
func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error {
if otp.Original != nil {
return nil
}
if !otp.Object.Type().IsDelta() {
return nil
}
obj, err := dw.encodedObject(otp.Hash())
if err != nil {
return err
}
otp.SetOriginal(obj)
return nil
}
// undeltify undeltifies an *ObjectToPack by retrieving the original object from
// the storer and resetting it.
func (dw *deltaSelector) undeltify(otp *ObjectToPack) error {
if err := dw.restoreOriginal(otp); err != nil {
return err
}
otp.Object = otp.Original
otp.Depth = 0
return nil
}
func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) {
sort.Sort(byTypeAndSize(objectsToPack))
}
func (dw *deltaSelector) walk(
objectsToPack []*ObjectToPack,
packWindow uint,
) error {
indexMap := make(map[plumbing.Hash]*deltaIndex)
for i := range len(objectsToPack) {
// Clean up the index map and reconstructed delta objects for anything
// outside our pack window, to save memory.
if i > int(packWindow) {
obj := objectsToPack[i-int(packWindow)]
delete(indexMap, obj.Hash())
if obj.IsDelta() {
obj.SaveOriginalMetadata()
obj.CleanOriginal()
}
}
target := objectsToPack[i]
// If we already have a delta, we don't try to find a new one for this
// object. This happens when a delta is set to be reused from an existing
// packfile.
if target.IsDelta() {
continue
}
// We only want to create deltas from specific types.
if !applyDelta[target.Type()] {
continue
}
for j := i - 1; j >= 0 && i-j < int(packWindow); j-- {
base := objectsToPack[j]
// Objects must use only the same type as their delta base.
// Since objectsToPack is sorted by type and size, once we find
// a different type, we know we won't find more of them.
if base.Type() != target.Type() {
break
}
if err := dw.tryToDeltify(indexMap, base, target); err != nil {
return err
}
}
}
return nil
}
func (dw *deltaSelector) tryToDeltify(indexMap map[plumbing.Hash]*deltaIndex, base, target *ObjectToPack) error {
// Original object might not be present if we're reusing a delta, so we
// ensure it is restored.
if err := dw.restoreOriginal(target); err != nil {
return err
}
if err := dw.restoreOriginal(base); err != nil {
return err
}
// If the sizes are radically different, this is a bad pairing.
if target.Size() < base.Size()>>4 {
return nil
}
msz := dw.deltaSizeLimit(
target.Object.Size(),
base.Depth,
target.Depth,
target.IsDelta(),
)
// Nearly impossible to fit useful delta.
if msz <= 8 {
return nil
}
// If we have to insert a lot to make this work, find another.
if base.Size()-target.Size() > msz {
return nil
}
if _, ok := indexMap[base.Hash()]; !ok {
indexMap[base.Hash()] = new(deltaIndex)
}
// Now we can generate the delta using originals
delta, err := getDelta(indexMap[base.Hash()], base.Original, target.Original)
if err != nil {
return err
}
// if delta better than target
if delta.Size() < msz {
target.SetDelta(base, delta)
}
return nil
}
func (dw *deltaSelector) deltaSizeLimit(targetSize int64, baseDepth int,
targetDepth int, targetDelta bool,
) int64 {
if !targetDelta {
// Any delta should be no more than 50% of the original size
// (for text files deflate of whole form should shrink 50%).
n := targetSize >> 1
// Evenly distribute delta size limits over allowed depth.
// If src is non-delta (depth = 0), delta <= 50% of original.
// If src is almost at limit (9/10), delta <= 10% of original.
return n * (maxDepth - int64(baseDepth)) / maxDepth
}
// With a delta base chosen any new delta must be "better".
// Retain the distribution described above.
d := int64(targetDepth)
n := targetSize
// If target depth is bigger than maxDepth, this delta is not suitable to be used.
if d >= maxDepth {
return 0
}
// If src is whole (depth=0) and base is near limit (depth=9/10)
// any delta using src can be 10x larger and still be better.
//
// If src is near limit (depth=9/10) and base is whole (depth=0)
// a new delta dependent on src must be 1/10th the size.
return n * (maxDepth - int64(baseDepth)) / (maxDepth - d)
}
type byTypeAndSize []*ObjectToPack
func (a byTypeAndSize) Len() int { return len(a) }
func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byTypeAndSize) Less(i, j int) bool {
if a[i].Type() < a[j].Type() {
return false
}
if a[i].Type() > a[j].Type() {
return true
}
return a[i].Size() > a[j].Size()
}
package packfile
import (
"bytes"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/sync"
)
// See https://github.com/jelmer/dulwich/blob/master/dulwich/pack.py and
// https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js
// for more info
const (
// Standard chunk size used to generate fingerprints
s = 16
// https://github.com/git/git/blob/f7466e94375b3be27f229c78873f0acf8301c0a5/diff-delta.c#L428
// Max size of a copy operation (64KB).
maxCopySize = 64 * 1024
// Min size of a copy operation.
minCopySize = 4
)
// GetDelta returns an EncodedObject of type OFSDeltaObject. Base and Target object,
// will be loaded into memory to be able to create the delta object.
// To generate target again, you will need the obtained object and "base" one.
// Error will be returned if base or target object cannot be read.
func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) {
return getDelta(new(deltaIndex), base, target)
}
func getDelta(index *deltaIndex, base, target plumbing.EncodedObject) (o plumbing.EncodedObject, err error) {
br, err := base.Reader()
if err != nil {
return nil, err
}
defer ioutil.CheckClose(br, &err)
tr, err := target.Reader()
if err != nil {
return nil, err
}
defer ioutil.CheckClose(tr, &err)
bb := sync.GetBytesBuffer()
defer sync.PutBytesBuffer(bb)
_, err = bb.ReadFrom(br)
if err != nil {
return nil, err
}
tb := sync.GetBytesBuffer()
defer sync.PutBytesBuffer(tb)
_, err = tb.ReadFrom(tr)
if err != nil {
return nil, err
}
db := diffDelta(index, bb.Bytes(), tb.Bytes())
delta := &plumbing.MemoryObject{}
_, err = delta.Write(db)
if err != nil {
return nil, err
}
delta.SetSize(int64(len(db)))
delta.SetType(plumbing.OFSDeltaObject)
return delta, nil
}
// DiffDelta returns the delta that transforms src into tgt.
func DiffDelta(src, tgt []byte) []byte {
return diffDelta(new(deltaIndex), src, tgt)
}
func diffDelta(index *deltaIndex, src, tgt []byte) []byte {
buf := sync.GetBytesBuffer()
defer sync.PutBytesBuffer(buf)
buf.Write(deltaEncodeSize(len(src)))
buf.Write(deltaEncodeSize(len(tgt)))
if len(index.entries) == 0 {
index.init(src)
}
ibuf := sync.GetBytesBuffer()
defer sync.PutBytesBuffer(ibuf)
for i := 0; i < len(tgt); i++ {
offset, l := index.findMatch(src, tgt, i)
if l == 0 {
// couldn't find a match, just write the current byte and continue
ibuf.WriteByte(tgt[i])
} else if l < 0 {
// src is less than blksz, copy the rest of the target to avoid
// calls to findMatch
for ; i < len(tgt); i++ {
ibuf.WriteByte(tgt[i])
}
} else if l < s {
// remaining target is less than blksz, copy what's left of it
// and avoid calls to findMatch
for j := i; j < i+l; j++ {
ibuf.WriteByte(tgt[j])
}
i += l - 1
} else {
encodeInsertOperation(ibuf, buf)
rl := l
aOffset := offset
for rl > 0 {
if rl < maxCopySize {
buf.Write(encodeCopyOperation(aOffset, rl))
break
}
buf.Write(encodeCopyOperation(aOffset, maxCopySize))
rl -= maxCopySize
aOffset += maxCopySize
}
i += l - 1
}
}
encodeInsertOperation(ibuf, buf)
// buf.Bytes() is only valid until the next modifying operation on the buffer. Copy it.
return append([]byte{}, buf.Bytes()...)
}
func encodeInsertOperation(ibuf, buf *bytes.Buffer) {
if ibuf.Len() == 0 {
return
}
b := ibuf.Bytes()
s := ibuf.Len()
o := 0
for s > 127 {
buf.WriteByte(byte(127))
buf.Write(b[o : o+127])
s -= 127
o += 127
}
buf.WriteByte(byte(s))
buf.Write(b[o : o+s])
ibuf.Reset()
}
func deltaEncodeSize(size int) []byte {
var ret []byte
c := size & 0x7f
size >>= 7
for size != 0 {
ret = append(ret, byte(c|0x80))
c = size & 0x7f
size >>= 7
}
ret = append(ret, byte(c))
return ret
}
func encodeCopyOperation(offset, length int) []byte {
code := 0x80
var opcodes []byte
var i uint
for i = 0; i < 4; i++ {
f := 0xff << (i * 8)
if offset&f != 0 {
opcodes = append(opcodes, byte(offset&f>>(i*8)))
code |= 0x01 << i
}
}
for i = range 3 {
f := 0xff << (i * 8)
if length&f != 0 {
opcodes = append(opcodes, byte(length&f>>(i*8)))
code |= 0x10 << i
}
}
return append([]byte{byte(code)}, opcodes...)
}
package packfile
import (
"compress/zlib"
"crypto"
"fmt"
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/hash"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/binary"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// Encoder gets the data from the storage and write it into the writer in PACK
// format
type Encoder struct {
selector *deltaSelector
w *offsetWriter
zw *zlib.Writer
hasher plumbing.Hasher
useRefDeltas bool
}
// NewEncoder creates a new packfile encoder using a specific Writer and
// EncodedObjectStorer. By default deltas used to generate the packfile will be
// OFSDeltaObject. To use Reference deltas, set useRefDeltas to true.
func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *Encoder {
h := plumbing.Hasher{
// TODO: Support passing an ObjectFormat (sha256)
Hash: hash.New(crypto.SHA1),
}
mw := io.MultiWriter(w, h)
ow := newOffsetWriter(mw)
zw := zlib.NewWriter(mw)
return &Encoder{
selector: newDeltaSelector(s),
w: ow,
zw: zw,
hasher: h,
useRefDeltas: useRefDeltas,
}
}
// Encode creates a packfile containing all the objects referenced in
// hashes and writes it to the writer in the Encoder. `packWindow`
// specifies the size of the sliding window used to compare objects
// for delta compression; 0 turns off delta compression entirely.
func (e *Encoder) Encode(
hashes []plumbing.Hash,
packWindow uint,
) (plumbing.Hash, error) {
objects, err := e.selector.ObjectsToPack(hashes, packWindow)
if err != nil {
return plumbing.ZeroHash, err
}
return e.encode(objects)
}
func (e *Encoder) encode(objects []*ObjectToPack) (plumbing.Hash, error) {
if err := e.head(len(objects)); err != nil {
return plumbing.ZeroHash, err
}
for _, o := range objects {
if err := e.entry(o); err != nil {
return plumbing.ZeroHash, err
}
}
return e.footer()
}
func (e *Encoder) head(numEntries int) error {
return binary.Write(
e.w,
signature,
int32(VersionSupported),
int32(numEntries),
)
}
func (e *Encoder) entry(o *ObjectToPack) (err error) {
if o.WantWrite() {
// A cycle exists in this delta chain. This should only occur if a
// selected object representation disappeared during writing
// (for example due to a concurrent repack) and a different base
// was chosen, forcing a cycle. Select something other than a
// delta, and write this object.
e.selector.restoreOriginal(o)
o.BackToOriginal()
}
if o.IsWritten() {
return nil
}
o.MarkWantWrite()
if err := e.writeBaseIfDelta(o); err != nil {
return err
}
// We need to check if we already write that object due a cyclic delta chain
if o.IsWritten() {
return nil
}
o.Offset = e.w.Offset()
if o.IsDelta() {
if err := e.writeDeltaHeader(o); err != nil {
return err
}
} else {
if err := e.entryHead(o.Type(), o.Size()); err != nil {
return err
}
}
e.zw.Reset(e.w)
defer ioutil.CheckClose(e.zw, &err)
or, err := o.Object.Reader()
if err != nil {
return err
}
defer ioutil.CheckClose(or, &err)
_, err = ioutil.CopyBufferPool(e.zw, or)
return err
}
func (e *Encoder) writeBaseIfDelta(o *ObjectToPack) error {
if o.IsDelta() && !o.Base.IsWritten() {
// We must write base first
return e.entry(o.Base)
}
return nil
}
func (e *Encoder) writeDeltaHeader(o *ObjectToPack) error {
// Write offset deltas by default
t := plumbing.OFSDeltaObject
if e.useRefDeltas {
t = plumbing.REFDeltaObject
}
if err := e.entryHead(t, o.Object.Size()); err != nil {
return err
}
if e.useRefDeltas {
return e.writeRefDeltaHeader(o.Base.Hash())
}
return e.writeOfsDeltaHeader(o)
}
func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error {
_, err := base.WriteTo(e.w)
return err
}
func (e *Encoder) writeOfsDeltaHeader(o *ObjectToPack) error {
// for OFS_DELTA, offset of the base is interpreted as negative offset
// relative to the type-byte of the header of the ofs-delta entry.
relativeOffset := o.Offset - o.Base.Offset
if relativeOffset <= 0 {
return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset)
}
return binary.WriteVariableWidthInt(e.w, relativeOffset)
}
func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error {
t := int64(typeNum)
header := []byte{}
c := (t << firstLengthBits) | (size & maskFirstLength)
size >>= firstLengthBits
for size != 0 {
header = append(header, byte(c|maskContinue))
c = size & int64(maskLength)
size >>= lengthBits
}
header = append(header, byte(c))
_, err := e.w.Write(header)
return err
}
func (e *Encoder) footer() (plumbing.Hash, error) {
h := e.hasher.Sum()
_, err := h.WriteTo(e.w)
return h, err
}
type offsetWriter struct {
w io.Writer
offset int64
}
func newOffsetWriter(w io.Writer) *offsetWriter {
return &offsetWriter{w: w}
}
func (ow *offsetWriter) Write(p []byte) (n int, err error) {
n, err = ow.w.Write(p)
ow.offset += int64(n)
return n, err
}
func (ow *offsetWriter) Offset() int64 {
return ow.offset
}
package packfile
import "fmt"
// Error specifies errors returned during packfile parsing.
type Error struct {
reason, details string
}
// NewError returns a new error.
func NewError(reason string) *Error {
return &Error{reason: reason}
}
// Error returns a text representation of the error.
func (e *Error) Error() string {
if e.details == "" {
return e.reason
}
return fmt.Sprintf("%s: %s", e.reason, e.details)
}
// AddDetails adds details to an error, with additional text.
func (e *Error) AddDetails(format string, args ...any) *Error {
return &Error{
reason: e.reason,
details: fmt.Sprintf(format, args...),
}
}
package packfile
import (
"bufio"
"errors"
"io"
"os"
billy "github.com/go-git/go-billy/v6"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/cache"
"github.com/go-git/go-git/v6/plumbing/format/idxfile"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/sync"
)
// FSObject is an object from the packfile on the filesystem.
type FSObject struct {
hash plumbing.Hash
offset int64
size int64
typ plumbing.ObjectType
index idxfile.Index
fs billy.Filesystem
pack billy.File
packPath string
cache cache.Object
}
// NewFSObject creates a new filesystem object.
func NewFSObject(
hash plumbing.Hash,
finalType plumbing.ObjectType,
offset int64,
contentSize int64,
index idxfile.Index,
fs billy.Filesystem,
pack billy.File,
packPath string,
cache cache.Object,
) *FSObject {
return &FSObject{
hash: hash,
offset: offset,
size: contentSize,
typ: finalType,
index: index,
fs: fs,
pack: pack,
packPath: packPath,
cache: cache,
}
}
// Reader implements the plumbing.EncodedObject interface.
func (o *FSObject) Reader() (io.ReadCloser, error) {
obj, ok := o.cache.Get(o.hash)
if ok && obj != o {
reader, err := obj.Reader()
if err != nil {
return nil, err
}
return reader, nil
}
var file io.Closer
_, err := o.pack.Seek(o.offset, io.SeekStart)
// fsobject aims to reuse an existing file descriptor to the packfile.
// In some cases that descriptor would already be closed, in such cases,
// open the packfile again and close it when the reader is closed.
if err != nil && errors.Is(err, os.ErrClosed) {
o.pack, err = o.fs.Open(o.packPath)
if err != nil {
return nil, err
}
file = o.pack
_, err = o.pack.Seek(o.offset, io.SeekStart)
}
if err != nil {
return nil, err
}
br := sync.GetBufioReader(o.pack)
zr, err := sync.GetZlibReader(br)
if err != nil {
return nil, err
}
return &zlibReadCloser{r: zr, f: file, rbuf: br}, nil
}
type zlibReadCloser struct {
r *sync.ZLibReader
f io.Closer
rbuf *bufio.Reader
closed bool
}
// Read reads up to len(p) bytes into p from the data.
func (r *zlibReadCloser) Read(p []byte) (int, error) {
return r.r.Read(p)
}
func (r *zlibReadCloser) Close() (err error) {
if r.closed {
return nil
}
r.closed = true
if r.f != nil {
defer ioutil.CheckClose(r.f, &err)
}
defer sync.PutBufioReader(r.rbuf)
defer sync.PutZlibReader(r.r)
return r.r.Close()
}
// SetSize implements the plumbing.EncodedObject interface. This method
// is a noop.
func (o *FSObject) SetSize(int64) {}
// SetType implements the plumbing.EncodedObject interface. This method is
// a noop.
func (o *FSObject) SetType(plumbing.ObjectType) {}
// Hash implements the plumbing.EncodedObject interface.
func (o *FSObject) Hash() plumbing.Hash { return o.hash }
// Size implements the plumbing.EncodedObject interface.
func (o *FSObject) Size() int64 { return o.size }
// Type implements the plumbing.EncodedObject interface.
func (o *FSObject) Type() plumbing.ObjectType {
return o.typ
}
// Writer implements the plumbing.EncodedObject interface. This method always
// returns a nil writer.
func (o *FSObject) Writer() (io.WriteCloser, error) {
return nil, nil
}
package packfile
import (
"github.com/go-git/go-git/v6/plumbing"
)
// ObjectToPack is a representation of an object that is going to be into a
// pack file.
type ObjectToPack struct {
// The main object to pack, it could be any object, including deltas.
Object plumbing.EncodedObject
// Base is the object that a delta is based on, which could also be another delta.
// Nil when the main object is not a delta.
Base *ObjectToPack
// Original is the object that we can generate applying the delta to
// Base, or the same object as Object in the case of a non-delta
// object.
Original plumbing.EncodedObject
// Depth is the amount of deltas needed to resolve to obtain Original
// (delta based on delta based on ...)
Depth int
// offset in pack when object has been already written, or 0 if it
// has not been written yet
Offset int64
// Information from the original object
resolvedOriginal bool
originalType plumbing.ObjectType
originalSize int64
originalHash plumbing.Hash
}
// newObjectToPack creates a correct ObjectToPack based on a non-delta object
func newObjectToPack(o plumbing.EncodedObject) *ObjectToPack {
return &ObjectToPack{
Object: o,
Original: o,
}
}
// newDeltaObjectToPack creates a correct ObjectToPack for a delta object, based on
// his base (could be another delta), the delta target (in this case called original),
// and the delta Object itself
func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedObject) *ObjectToPack {
return &ObjectToPack{
Object: delta,
Base: base,
Original: original,
Depth: base.Depth + 1,
}
}
// BackToOriginal converts that ObjectToPack to a non-deltified object if it was one
func (o *ObjectToPack) BackToOriginal() {
if o.IsDelta() && o.Original != nil {
o.Object = o.Original
o.Base = nil
o.Depth = 0
}
}
// IsWritten returns if that ObjectToPack was
// already written into the packfile or not
func (o *ObjectToPack) IsWritten() bool {
return o.Offset > 1
}
// MarkWantWrite marks this ObjectToPack as WantWrite
// to avoid delta chain loops
func (o *ObjectToPack) MarkWantWrite() {
o.Offset = 1
}
// WantWrite checks if this ObjectToPack was marked as WantWrite before
func (o *ObjectToPack) WantWrite() bool {
return o.Offset == 1
}
// SetOriginal sets both Original and saves size, type and hash. If object
// is nil Original is set but previous resolved values are kept
func (o *ObjectToPack) SetOriginal(obj plumbing.EncodedObject) {
o.Original = obj
o.SaveOriginalMetadata()
}
// SaveOriginalMetadata saves size, type and hash of Original object
func (o *ObjectToPack) SaveOriginalMetadata() {
if o.Original != nil {
o.originalSize = o.Original.Size()
o.originalType = o.Original.Type()
o.originalHash = o.Original.Hash()
o.resolvedOriginal = true
}
}
// CleanOriginal sets Original to nil
func (o *ObjectToPack) CleanOriginal() {
o.Original = nil
}
// Type returns the object type.
func (o *ObjectToPack) Type() plumbing.ObjectType {
if o.Original != nil {
return o.Original.Type()
}
if o.resolvedOriginal {
return o.originalType
}
if o.Base != nil {
return o.Base.Type()
}
if o.Object != nil {
return o.Object.Type()
}
panic("cannot get type")
}
// Hash returns the object hash.
func (o *ObjectToPack) Hash() plumbing.Hash {
if o.Original != nil {
return o.Original.Hash()
}
if o.resolvedOriginal {
return o.originalHash
}
do, ok := o.Object.(plumbing.DeltaObject)
if ok {
return do.ActualHash()
}
panic("cannot get hash")
}
// Size returns the object size.
func (o *ObjectToPack) Size() int64 {
if o.Original != nil {
return o.Original.Size()
}
if o.resolvedOriginal {
return o.originalSize
}
do, ok := o.Object.(plumbing.DeltaObject)
if ok {
return do.ActualSize()
}
panic("cannot get ObjectToPack size")
}
// IsDelta returns true if the object is a delta.
func (o *ObjectToPack) IsDelta() bool {
return o.Base != nil
}
// SetDelta sets the object's base and delta.
func (o *ObjectToPack) SetDelta(base *ObjectToPack, delta plumbing.EncodedObject) {
o.Object = delta
o.Base = base
o.Depth = base.Depth + 1
}
package packfile
import (
"bufio"
"crypto"
"fmt"
"io"
"sync"
billy "github.com/go-git/go-billy/v6"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/cache"
format "github.com/go-git/go-git/v6/plumbing/format/config"
"github.com/go-git/go-git/v6/plumbing/format/idxfile"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/ioutil"
gogitsync "github.com/go-git/go-git/v6/utils/sync"
)
var (
// ErrInvalidObject is returned by Decode when an invalid object is
// found in the packfile.
ErrInvalidObject = NewError("invalid git object")
// ErrZLib is returned by Decode when there was an error unzipping
// the packfile contents.
ErrZLib = NewError("zlib reading error")
)
// Packfile allows retrieving information from inside a packfile.
type Packfile struct {
idxfile.Index
fs billy.Filesystem
file billy.File
scanner *Scanner
cache cache.Object
rbuf *bufio.Reader
id plumbing.Hash
m sync.Mutex
objectIdSize int
once sync.Once
onceErr error
}
// NewPackfile returns a packfile representation for the given packfile file
// and packfile idx.
// If the filesystem is provided, the packfile will return FSObjects, otherwise
// it will return MemoryObjects.
func NewPackfile(
file billy.File,
opts ...PackfileOption,
) *Packfile {
p := &Packfile{
file: file,
objectIdSize: crypto.SHA1.Size(),
}
for _, opt := range opts {
opt(p)
}
return p
}
// Get retrieves the encoded object in the packfile with the given hash.
func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) {
if err := p.init(); err != nil {
return nil, err
}
p.m.Lock()
defer p.m.Unlock()
return p.get(h)
}
// GetByOffset retrieves the encoded object from the packfile at the given
// offset.
func (p *Packfile) GetByOffset(offset int64) (plumbing.EncodedObject, error) {
if err := p.init(); err != nil {
return nil, err
}
p.m.Lock()
defer p.m.Unlock()
return p.getByOffset(offset)
}
// GetSizeByOffset retrieves the size of the encoded object from the
// packfile with the given offset.
func (p *Packfile) GetSizeByOffset(offset int64) (size int64, err error) {
if err := p.init(); err != nil {
return 0, err
}
d, err := p.GetByOffset(offset)
if err != nil {
return 0, err
}
return d.Size(), nil
}
// GetAll returns an iterator with all encoded objects in the packfile.
// The iterator returned is not thread-safe, it should be used in the same
// thread as the Packfile instance.
func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) {
return p.GetByType(plumbing.AnyObject)
}
// GetByType returns all the objects of the given type.
func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, error) {
if err := p.init(); err != nil {
return nil, err
}
switch typ {
case plumbing.AnyObject,
plumbing.BlobObject,
plumbing.TreeObject,
plumbing.CommitObject,
plumbing.TagObject:
entries, err := p.EntriesByOffset()
if err != nil {
return nil, err
}
return &objectIter{
p: p,
iter: entries,
typ: typ,
}, nil
default:
return nil, plumbing.ErrInvalidType
}
}
// Scanner returns the Packfile's inner scanner.
//
// Deprecated: this will be removed in future versions of the packfile package
// to avoid exposing the package internals and to improve its thread-safety.
// TODO: Remove Scanner method
func (p *Packfile) Scanner() (*Scanner, error) {
if err := p.init(); err != nil {
return nil, err
}
return p.scanner, nil
}
// ID returns the ID of the packfile, which is the checksum at the end of it.
func (p *Packfile) ID() (plumbing.Hash, error) {
if err := p.init(); err != nil {
return plumbing.ZeroHash, err
}
return p.id, nil
}
// get is not threat-safe, and should only be called within packfile.go.
func (p *Packfile) get(h plumbing.Hash) (plumbing.EncodedObject, error) {
if obj, ok := p.cache.Get(h); ok {
return obj, nil
}
offset, err := p.FindOffset(h)
if err != nil {
return nil, err
}
oh, err := p.headerFromOffset(offset)
if err != nil {
return nil, err
}
return p.objectFromHeader(oh)
}
// getByOffset is not threat-safe, and should only be called within packfile.go.
func (p *Packfile) getByOffset(offset int64) (plumbing.EncodedObject, error) {
h, err := p.FindHash(offset)
if err != nil {
return nil, err
}
if obj, ok := p.cache.Get(h); ok {
return obj, nil
}
oh, err := p.headerFromOffset(offset)
if err != nil {
return nil, err
}
return p.objectFromHeader(oh)
}
func (p *Packfile) init() error {
p.once.Do(func() {
if p.file == nil {
p.onceErr = fmt.Errorf("file is not set")
return
}
if p.Index == nil {
p.onceErr = fmt.Errorf("index is not set")
return
}
p.rbuf = gogitsync.GetBufioReader(nil)
opts := []ScannerOption{WithBufioReader(p.rbuf)}
if p.objectIdSize == format.SHA256Size {
opts = append(opts, WithSHA256())
}
p.scanner = NewScanner(p.file, opts...)
// Validate packfile signature.
if !p.scanner.Scan() {
p.onceErr = p.scanner.Error()
return
}
_, err := p.scanner.Seek(-int64(p.objectIdSize), io.SeekEnd)
if err != nil {
p.onceErr = err
return
}
p.id.ResetBySize(p.objectIdSize)
_, err = p.id.ReadFrom(p.scanner)
if err != nil {
p.onceErr = err
}
if p.cache == nil {
p.cache = cache.NewObjectLRUDefault()
}
})
return p.onceErr
}
func (p *Packfile) headerFromOffset(offset int64) (*ObjectHeader, error) {
err := p.scanner.SeekFromStart(offset)
if err != nil {
return nil, err
}
if !p.scanner.Scan() {
return nil, plumbing.ErrObjectNotFound
}
oh := p.scanner.Data().Value().(ObjectHeader)
return &oh, nil
}
// Close the packfile and its resources.
func (p *Packfile) Close() error {
p.m.Lock()
defer p.m.Unlock()
gogitsync.PutBufioReader(p.rbuf)
closer, ok := p.file.(io.Closer)
if !ok {
return nil
}
return closer.Close()
}
func (p *Packfile) objectFromHeader(oh *ObjectHeader) (plumbing.EncodedObject, error) {
if oh == nil {
return nil, plumbing.ErrObjectNotFound
}
// If we have filesystem, and the object is not a delta type, return a FSObject.
// This avoids having to inflate the object more than once.
if !oh.Type.IsDelta() && p.fs != nil {
fs := NewFSObject(
oh.ID(),
oh.Type,
oh.ContentOffset,
oh.Size,
p.Index,
p.fs,
p.file,
p.file.Name(),
p.cache,
)
p.cache.Put(fs)
return fs, nil
}
return p.getMemoryObject(oh)
}
func (p *Packfile) getMemoryObject(oh *ObjectHeader) (plumbing.EncodedObject, error) {
obj := new(plumbing.MemoryObject)
obj.SetSize(oh.Size)
obj.SetType(oh.Type)
w, err := obj.Writer()
if err != nil {
return nil, err
}
defer ioutil.CheckClose(w, &err)
switch oh.Type {
case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject:
err = p.scanner.inflateContent(oh.ContentOffset, w)
case plumbing.REFDeltaObject, plumbing.OFSDeltaObject:
var parent plumbing.EncodedObject
switch oh.Type {
case plumbing.REFDeltaObject:
var ok bool
parent, ok = p.cache.Get(oh.Reference)
if !ok {
parent, err = p.get(oh.Reference)
}
case plumbing.OFSDeltaObject:
parent, err = p.getByOffset(oh.OffsetReference)
}
if err != nil {
return nil, fmt.Errorf("cannot find base object: %w", err)
}
if oh.content == nil {
oh.content = gogitsync.GetBytesBuffer()
}
err = p.scanner.inflateContent(oh.ContentOffset, oh.content)
if err != nil {
return nil, fmt.Errorf("cannot inflate content: %w", err)
}
obj.SetType(parent.Type())
err = ApplyDelta(obj, parent, oh.content)
default:
err = ErrInvalidObject.AddDetails("type %q", oh.Type)
}
if err != nil {
return nil, err
}
p.cache.Put(obj)
return obj, nil
}
package packfile
import (
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/format/idxfile"
)
type objectIter struct {
p *Packfile
typ plumbing.ObjectType
iter idxfile.EntryIter
}
func (i *objectIter) Next() (plumbing.EncodedObject, error) {
if err := i.p.init(); err != nil {
return nil, err
}
i.p.m.Lock()
defer i.p.m.Unlock()
return i.next()
}
func (i *objectIter) next() (plumbing.EncodedObject, error) {
for {
e, err := i.iter.Next()
if err != nil {
return nil, err
}
oh, err := i.p.headerFromOffset(int64(e.Offset))
if err != nil {
return nil, err
}
if i.typ == plumbing.AnyObject {
return i.p.objectFromHeader(oh)
}
// Current object header type is a delta, get the actual object to
// assess the actual type.
if oh.Type.IsDelta() {
o, err := i.p.objectFromHeader(oh)
if o.Type() == i.typ {
return o, err
}
continue
}
if oh.Type == i.typ {
return i.p.objectFromHeader(oh)
}
continue
}
}
func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error {
if err := i.p.init(); err != nil {
return err
}
i.p.m.Lock()
defer i.p.m.Unlock()
for {
o, err := i.next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
if err := f(o); err != nil {
return err
}
}
}
func (i *objectIter) Close() {
i.p.m.Lock()
defer i.p.m.Unlock()
i.iter.Close()
}
package packfile
import (
billy "github.com/go-git/go-billy/v6"
"github.com/go-git/go-git/v6/plumbing/cache"
"github.com/go-git/go-git/v6/plumbing/format/idxfile"
)
// PackfileOption configures a Packfile.
type PackfileOption func(*Packfile)
// WithCache sets the cache to be used throughout Packfile operations.
// Use this to share existing caches with the Packfile. If not used, a
// new cache instance will be created.
func WithCache(cache cache.Object) PackfileOption {
return func(p *Packfile) {
p.cache = cache
}
}
// WithIdx sets the idxfile for the packfile.
func WithIdx(idx idxfile.Index) PackfileOption {
return func(p *Packfile) {
p.Index = idx
}
}
// WithFs sets the filesystem to be used.
func WithFs(fs billy.Filesystem) PackfileOption {
return func(p *Packfile) {
p.fs = fs
}
}
// WithObjectIDSize sets the size of the object IDs inside the packfile.
// Valid options are hash.SHA1Size and hash.SHA256Size.
//
// When no object ID size is set, hash.SHA1Size will be used.
func WithObjectIDSize(sz int) PackfileOption {
return func(p *Packfile) {
p.objectIdSize = sz
}
}
package packfile
import (
"bytes"
"errors"
"fmt"
"io"
stdsync "sync"
"github.com/go-git/go-git/v6/plumbing"
format "github.com/go-git/go-git/v6/plumbing/format/config"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/sync"
)
var (
// ErrReferenceDeltaNotFound is returned when the reference delta is not
// found.
ErrReferenceDeltaNotFound = errors.New("reference delta not found")
// ErrNotSeekableSource is returned when the source for the parser is not
// seekable and a storage was not provided, so it can't be parsed.
ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided")
// ErrDeltaNotCached is returned when the delta could not be found in cache.
ErrDeltaNotCached = errors.New("delta could not be found in cache")
)
// Parser decodes a packfile and calls any observer associated to it. Is used
// to generate indexes.
type Parser struct {
storage storer.EncodedObjectStorer
cache *parserCache
lowMemoryMode bool
scanner *Scanner
observers []Observer
hasher plumbing.Hasher
checksum plumbing.Hash
m stdsync.Mutex
}
// LowMemoryCapable is implemented by storage types that are capable of
// operating in low-memory mode.
type LowMemoryCapable interface {
// LowMemoryMode defines whether the storage is able and willing for
// the parser to operate in low-memory mode.
LowMemoryMode() bool
}
// NewParser creates a new Parser.
// When a storage is set, the objects are written to storage as they
// are parsed.
func NewParser(data io.Reader, opts ...ParserOption) *Parser {
p := &Parser{
hasher: plumbing.NewHasher(format.SHA1, plumbing.AnyObject, 0),
}
for _, opt := range opts {
if opt != nil {
opt(p)
}
}
p.scanner = NewScanner(data)
if p.storage != nil {
p.scanner.storage = p.storage
lm, ok := p.storage.(LowMemoryCapable)
p.lowMemoryMode = ok && lm.LowMemoryMode()
}
if p.scanner.seeker == nil {
p.lowMemoryMode = false
}
p.scanner.lowMemoryMode = p.lowMemoryMode
p.cache = newParserCache()
return p
}
func (p *Parser) storeOrCache(oh *ObjectHeader) error {
// Only need to store deltas, as the scanner already stored non-delta
// objects.
if p.storage != nil && oh.diskType.IsDelta() {
w, err := p.storage.RawObjectWriter(oh.Type, oh.Size)
if err != nil {
return err
}
defer w.Close()
_, err = ioutil.CopyBufferPool(w, oh.content)
if err != nil {
return err
}
}
if p.cache != nil {
o := oh
for p.lowMemoryMode && o.content != nil {
sync.PutBytesBuffer(o.content)
o.content = nil
if o.parent == nil || o.parent.content == nil {
break
}
o = o.parent
}
p.cache.Add(oh)
}
if err := p.onInflatedObjectHeader(oh.Type, oh.Size, oh.Offset); err != nil {
return err
}
if err := p.onInflatedObjectContent(oh.Hash, oh.Offset, oh.Crc32, nil); err != nil {
return err
}
return nil
}
func (p *Parser) resetCache(qty int) {
if p.cache != nil {
p.cache.Reset(qty)
}
}
// Parse start decoding phase of the packfile.
func (p *Parser) Parse() (plumbing.Hash, error) {
p.m.Lock()
defer p.m.Unlock()
var pendingDeltas []*ObjectHeader
var pendingDeltaREFs []*ObjectHeader
for p.scanner.Scan() {
data := p.scanner.Data()
switch data.Section {
case HeaderSection:
header := data.Value().(Header)
p.resetCache(int(header.ObjectsQty))
p.onHeader(header.ObjectsQty)
case ObjectSection:
oh := data.Value().(ObjectHeader)
if oh.Type.IsDelta() {
switch oh.Type {
case plumbing.OFSDeltaObject:
pendingDeltas = append(pendingDeltas, &oh)
case plumbing.REFDeltaObject:
pendingDeltaREFs = append(pendingDeltaREFs, &oh)
}
continue
}
if p.lowMemoryMode && oh.content != nil {
sync.PutBytesBuffer(oh.content)
oh.content = nil
}
p.storeOrCache(&oh)
case FooterSection:
p.checksum = data.Value().(plumbing.Hash)
}
}
if p.scanner.objects == 0 {
return plumbing.ZeroHash, ErrEmptyPackfile
}
for _, oh := range pendingDeltaREFs {
err := p.processDelta(oh)
if err != nil {
return plumbing.ZeroHash, fmt.Errorf("processing ref-delta at offset %v: %w", oh.Offset, err)
}
}
for _, oh := range pendingDeltas {
err := p.processDelta(oh)
if err != nil {
return plumbing.ZeroHash, fmt.Errorf("processing ofs-delta at offset %v: %w", oh.Offset, err)
}
}
// Return to pool all objects used.
go func() {
for _, oh := range p.cache.oi {
if oh.content != nil {
sync.PutBytesBuffer(oh.content)
oh.content = nil
}
}
}()
return p.checksum, p.onFooter(p.checksum)
}
func (p *Parser) ensureContent(oh *ObjectHeader) error {
// Skip if this object already has the correct content.
if oh.content != nil && oh.content.Len() == int(oh.Size) && !oh.Hash.IsZero() {
return nil
}
if oh.content == nil {
oh.content = sync.GetBytesBuffer()
}
var err error
if !p.lowMemoryMode && oh.content != nil && oh.content.Len() > 0 {
source := oh.content
oh.content = sync.GetBytesBuffer()
defer sync.PutBytesBuffer(source)
err = p.applyPatchBaseHeader(oh, source, oh.content, nil)
} else if p.scanner.seeker != nil {
deltaData := sync.GetBytesBuffer()
defer sync.PutBytesBuffer(deltaData)
err = p.scanner.inflateContent(oh.ContentOffset, deltaData)
if err != nil {
return fmt.Errorf("inflating content at offset %v: %w", oh.ContentOffset, err)
}
err = p.applyPatchBaseHeader(oh, deltaData, oh.content, nil)
} else {
return fmt.Errorf("can't ensure content: %w", plumbing.ErrObjectNotFound)
}
if err != nil {
return fmt.Errorf("apply delta patch: %w", err)
}
return nil
}
func (p *Parser) processDelta(oh *ObjectHeader) error {
switch oh.Type {
case plumbing.OFSDeltaObject:
pa, ok := p.cache.oiByOffset[oh.OffsetReference]
if !ok {
return plumbing.ErrObjectNotFound
}
oh.parent = pa
case plumbing.REFDeltaObject:
pa, ok := p.cache.oiByHash[oh.Reference]
if !ok {
// can't find referenced object in this pack file
// this must be a "thin" pack.
oh.parent = &ObjectHeader{ // Placeholder parent
Hash: oh.Reference,
externalRef: true, // mark as an external reference that must be resolved
Type: plumbing.AnyObject,
diskType: plumbing.AnyObject,
}
} else {
oh.parent = pa
}
p.cache.oiByHash[oh.Reference] = oh.parent
default:
return fmt.Errorf("unsupported delta type: %v", oh.Type)
}
if err := p.ensureContent(oh); err != nil {
return err
}
return p.storeOrCache(oh)
}
// parentReader returns a [io.ReaderAt] for the decompressed contents
// of the parent.
func (p *Parser) parentReader(parent *ObjectHeader) (io.ReaderAt, error) {
if parent.content != nil && parent.content.Len() > 0 {
return bytes.NewReader(parent.content.Bytes()), nil
}
// If parent is a Delta object, the inflated object must come
// from either cache or storage, else we would need to inflate
// it to then inflate the current object, which could go on
// indefinitely.
if p.storage != nil && parent.Hash != plumbing.ZeroHash {
obj, err := p.storage.EncodedObject(parent.Type, parent.Hash)
if err == nil {
// Ensure that external references have the correct type and size.
parent.Type = obj.Type()
parent.Size = obj.Size()
r, err := obj.Reader()
if err == nil {
defer r.Close()
if parent.content == nil {
parent.content = sync.GetBytesBuffer()
}
parent.content.Grow(int(parent.Size))
_, err = ioutil.CopyBufferPool(parent.content, r)
if err == nil {
return bytes.NewReader(parent.content.Bytes()), nil
}
}
}
}
// If the parent is not an external ref and we don't have the
// content offset, we won't be able to inflate via seeking through
// the packfile.
if !parent.externalRef && parent.ContentOffset == 0 {
return nil, plumbing.ErrObjectNotFound
}
// Not a seeker data source, so avoid seeking the content.
if p.scanner.seeker == nil {
return nil, plumbing.ErrObjectNotFound
}
if parent.content == nil {
parent.content = sync.GetBytesBuffer()
}
parent.content.Grow(int(parent.Size))
err := p.scanner.inflateContent(parent.ContentOffset, parent.content)
if err != nil {
return nil, ErrReferenceDeltaNotFound
}
return bytes.NewReader(parent.content.Bytes()), nil
}
func (p *Parser) applyPatchBaseHeader(ota *ObjectHeader, delta io.Reader, target io.Writer, wh objectHeaderWriter) error {
if target == nil {
return fmt.Errorf("cannot apply patch against nil target")
}
parentContents, err := p.parentReader(ota.parent)
if err != nil {
return err
}
typ := ota.Type
if ota.Hash == plumbing.ZeroHash {
typ = ota.parent.Type
}
sz, h, err := patchDeltaWriter(target, parentContents, delta, typ, wh)
if err != nil {
return err
}
if ota.Hash == plumbing.ZeroHash {
ota.Type = typ
ota.Size = int64(sz)
ota.Hash = h
}
return nil
}
func (p *Parser) forEachObserver(f func(o Observer) error) error {
for _, o := range p.observers {
if err := f(o); err != nil {
return err
}
}
return nil
}
func (p *Parser) onHeader(count uint32) error {
return p.forEachObserver(func(o Observer) error {
return o.OnHeader(count)
})
}
func (p *Parser) onInflatedObjectHeader(
t plumbing.ObjectType,
objSize int64,
pos int64,
) error {
return p.forEachObserver(func(o Observer) error {
return o.OnInflatedObjectHeader(t, objSize, pos)
})
}
func (p *Parser) onInflatedObjectContent(
h plumbing.Hash,
pos int64,
crc uint32,
content []byte,
) error {
return p.forEachObserver(func(o Observer) error {
return o.OnInflatedObjectContent(h, pos, crc, content)
})
}
func (p *Parser) onFooter(h plumbing.Hash) error {
return p.forEachObserver(func(o Observer) error {
return o.OnFooter(h)
})
}
package packfile
import (
"slices"
"github.com/go-git/go-git/v6/plumbing"
)
func newParserCache() *parserCache {
c := &parserCache{}
return c
}
// parserCache defines the cache used within the parser.
// This is not thread safe by itself, and relies on the parser to
// enforce thread-safety.
type parserCache struct {
oi []*ObjectHeader
oiByHash map[plumbing.Hash]*ObjectHeader
oiByOffset map[int64]*ObjectHeader
}
func (c *parserCache) Add(oh *ObjectHeader) {
c.oiByHash[oh.Hash] = oh
c.oiByOffset[oh.Offset] = oh
c.oi = append(c.oi, oh)
}
func (c *parserCache) Reset(n int) {
if c.oi == nil {
c.oi = make([]*ObjectHeader, 0, n)
c.oiByHash = make(map[plumbing.Hash]*ObjectHeader, n)
c.oiByOffset = make(map[int64]*ObjectHeader, n)
} else {
c.oi = c.oi[:0]
c.oi = slices.Grow(c.oi, n)
clear(c.oiByHash)
clear(c.oiByOffset)
}
}
package packfile
import (
"github.com/go-git/go-git/v6/plumbing/storer"
)
// ParserOption configures a Parser.
type ParserOption func(*Parser)
// WithStorage sets the storage to be used while parsing a pack file.
func WithStorage(storage storer.EncodedObjectStorer) ParserOption {
return func(p *Parser) {
p.storage = storage
}
}
// WithScannerObservers sets the observers to be notified during the
// scanning or parsing of a pack file. The scanner is responsible for
// notifying observers around general pack file information, such as
// header and footer. The scanner also notifies object headers for
// non-delta objects.
//
// Delta objects are notified as part of the parser logic.
func WithScannerObservers(ob ...Observer) ParserOption {
return func(p *Parser) {
p.observers = ob
}
}
// WithHighMemoryMode optimises the parser for speed rather than
// for memory consumption, making the Parser faster from an execution
// time perspective, but yielding much more allocations, which in the
// long run could make the application slower due to GC pressure.
//
// When the parser is being used without a storage, this is enabled
// automatically, as it can't operate without it. Some storage types
// may no support low memory mode (i.e. memory storage), for storage
// types that do support it, this becomes an opt-in feature.
//
// When enabled the inflated content of all delta objects (ofs and ref)
// will be loaded into cache, making it faster to navigate through them.
// If the reader provided to the parser does not implement io.Seeker,
// full objects may also be loaded into memory.
func WithHighMemoryMode() ParserOption {
return func(p *Parser) {
p.lowMemoryMode = false
}
}
package packfile
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"math"
"github.com/go-git/go-git/v6/plumbing"
format "github.com/go-git/go-git/v6/plumbing/format/config"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/sync"
)
// See https://github.com/git/git/blob/49fa3dc76179e04b0833542fa52d0f287a4955ac/delta.h
// https://github.com/git/git/blob/c2c5f6b1e479f2c38e0e01345350620944e3527f/patch-delta.c,
// and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js
// for details about the delta format.
// Delta errors.
var (
ErrInvalidDelta = errors.New("invalid delta")
ErrDeltaCmd = errors.New("wrong delta command")
)
const (
payload = 0x7f // 0111 1111
continuation = 0x80 // 1000 0000
// maxPatchPreemptionSize defines what is the max size of bytes to be
// preemptively made available for a patch operation.
maxPatchPreemptionSize uint = 65536
// minDeltaSize defines the smallest size for a delta.
minDeltaSize = 4
)
type offset struct {
mask byte
shift uint
}
var offsets = []offset{
{mask: 0x01, shift: 0},
{mask: 0x02, shift: 8},
{mask: 0x04, shift: 16},
{mask: 0x08, shift: 24},
}
var sizes = []offset{
{mask: 0x10, shift: 0},
{mask: 0x20, shift: 8},
{mask: 0x40, shift: 16},
}
// ApplyDelta writes to target the result of applying the modification deltas in delta to base.
func ApplyDelta(target, base plumbing.EncodedObject, delta *bytes.Buffer) (err error) {
r, err := base.Reader()
if err != nil {
return err
}
defer ioutil.CheckClose(r, &err)
w, err := target.Writer()
if err != nil {
return err
}
defer ioutil.CheckClose(w, &err)
buf := sync.GetBytesBuffer()
defer sync.PutBytesBuffer(buf)
_, err = buf.ReadFrom(r)
if err != nil {
return err
}
src := buf.Bytes()
dst := sync.GetBytesBuffer()
defer sync.PutBytesBuffer(dst)
err = patchDelta(dst, src, delta.Bytes())
if err != nil {
return err
}
target.SetSize(int64(dst.Len()))
_, err = ioutil.CopyBufferPool(w, dst)
return err
}
// PatchDelta returns the result of applying the modification deltas in delta to src.
// An error will be returned if delta is corrupted (ErrInvalidDelta) or an action command
// is not copy from source or copy from delta (ErrDeltaCmd).
func PatchDelta(src, delta []byte) ([]byte, error) {
if len(src) == 0 || len(delta) < minDeltaSize {
return nil, ErrInvalidDelta
}
b := &bytes.Buffer{}
if err := patchDelta(b, src, delta); err != nil {
return nil, err
}
return b.Bytes(), nil
}
// ReaderFromDelta returns a reader that applies a delta to a base object.
func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadCloser, error) {
deltaBuf := bufio.NewReaderSize(deltaRC, 1024)
srcSz, err := decodeLEB128ByteReader(deltaBuf)
if err != nil {
if err == io.EOF {
return nil, ErrInvalidDelta
}
return nil, err
}
if srcSz != uint(base.Size()) {
return nil, ErrInvalidDelta
}
targetSz, err := decodeLEB128ByteReader(deltaBuf)
if err != nil {
if err == io.EOF {
return nil, ErrInvalidDelta
}
return nil, err
}
remainingTargetSz := targetSz
dstRd, dstWr := io.Pipe()
go func() {
baseRd, err := base.Reader()
if err != nil {
_ = dstWr.CloseWithError(ErrInvalidDelta)
return
}
defer baseRd.Close()
baseBuf := bufio.NewReader(baseRd)
basePos := uint(0)
for {
cmd, err := deltaBuf.ReadByte()
if err == io.EOF {
_ = dstWr.CloseWithError(ErrInvalidDelta)
return
}
if err != nil {
_ = dstWr.CloseWithError(err)
return
}
switch {
case isCopyFromSrc(cmd):
offset, err := decodeOffsetByteReader(cmd, deltaBuf)
if err != nil {
_ = dstWr.CloseWithError(err)
return
}
sz, err := decodeSizeByteReader(cmd, deltaBuf)
if err != nil {
_ = dstWr.CloseWithError(err)
return
}
if invalidSize(sz, targetSz) ||
invalidOffsetSize(offset, sz, srcSz) {
_ = dstWr.Close()
return
}
discard := offset - basePos
if basePos > offset {
_ = baseRd.Close()
baseRd, err = base.Reader()
if err != nil {
_ = dstWr.CloseWithError(ErrInvalidDelta)
return
}
baseBuf.Reset(baseRd)
discard = offset
}
for discard > math.MaxInt32 {
n, err := baseBuf.Discard(math.MaxInt32)
if err != nil {
_ = dstWr.CloseWithError(err)
return
}
basePos += uint(n)
discard -= uint(n)
}
for discard > 0 {
n, err := baseBuf.Discard(int(discard))
if err != nil {
_ = dstWr.CloseWithError(err)
return
}
basePos += uint(n)
discard -= uint(n)
}
if _, err := ioutil.CopyBufferPool(dstWr, io.LimitReader(baseBuf, int64(sz))); err != nil {
_ = dstWr.CloseWithError(err)
return
}
remainingTargetSz -= sz
basePos += sz
case isCopyFromDelta(cmd):
sz := uint(cmd) // cmd is the size itself
if invalidSize(sz, targetSz) {
_ = dstWr.CloseWithError(ErrInvalidDelta)
return
}
if _, err := ioutil.CopyBufferPool(dstWr, io.LimitReader(deltaBuf, int64(sz))); err != nil {
_ = dstWr.CloseWithError(err)
return
}
remainingTargetSz -= sz
default:
_ = dstWr.CloseWithError(ErrDeltaCmd)
return
}
if remainingTargetSz <= 0 {
_ = dstWr.Close()
return
}
}
}()
return dstRd, nil
}
func patchDelta(dst *bytes.Buffer, src, delta []byte) error {
if len(delta) < minCopySize {
return ErrInvalidDelta
}
srcSz, delta := decodeLEB128(delta)
if srcSz != uint(len(src)) {
return ErrInvalidDelta
}
targetSz, delta := decodeLEB128(delta)
remainingTargetSz := targetSz
var cmd byte
growSz := min(targetSz, maxPatchPreemptionSize)
dst.Grow(int(growSz))
for {
if len(delta) == 0 {
return ErrInvalidDelta
}
cmd = delta[0]
delta = delta[1:]
switch {
case isCopyFromSrc(cmd):
var offset, sz uint
var err error
offset, delta, err = decodeOffset(cmd, delta)
if err != nil {
return err
}
sz, delta, err = decodeSize(cmd, delta)
if err != nil {
return err
}
if invalidSize(sz, targetSz) ||
invalidOffsetSize(offset, sz, srcSz) {
break
}
dst.Write(src[offset : offset+sz])
remainingTargetSz -= sz
case isCopyFromDelta(cmd):
sz := uint(cmd) // cmd is the size itself
if invalidSize(sz, targetSz) {
return ErrInvalidDelta
}
if uint(len(delta)) < sz {
return ErrInvalidDelta
}
dst.Write(delta[0:sz])
remainingTargetSz -= sz
delta = delta[sz:]
default:
return ErrDeltaCmd
}
if remainingTargetSz <= 0 {
break
}
}
return nil
}
func patchDeltaWriter(dst io.Writer, base io.ReaderAt, delta io.Reader,
typ plumbing.ObjectType, writeHeader objectHeaderWriter,
) (uint, plumbing.Hash, error) {
deltaBuf := bufio.NewReader(delta)
srcSz, err := decodeLEB128ByteReader(deltaBuf)
if err != nil {
if err == io.EOF {
return 0, plumbing.ZeroHash, ErrInvalidDelta
}
return 0, plumbing.ZeroHash, err
}
if r, ok := base.(*bytes.Reader); ok && srcSz != uint(r.Size()) {
return 0, plumbing.ZeroHash, ErrInvalidDelta
}
targetSz, err := decodeLEB128ByteReader(deltaBuf)
if err != nil {
if err == io.EOF {
return 0, plumbing.ZeroHash, ErrInvalidDelta
}
return 0, plumbing.ZeroHash, err
}
// Avoid several interactions expanding the buffer, which can be quite
// inefficient on large deltas.
if b, ok := dst.(*bytes.Buffer); ok {
b.Grow(int(targetSz))
}
// If header still needs to be written, caller will provide
// a LazyObjectWriterHeader. This seems to be the case when
// dealing with thin-packs.
if writeHeader != nil {
err = writeHeader(typ, int64(targetSz))
if err != nil {
return 0, plumbing.ZeroHash, fmt.Errorf("could not lazy write header: %w", err)
}
}
remainingTargetSz := targetSz
hasher := plumbing.NewHasher(format.SHA1, typ, int64(targetSz))
mw := io.MultiWriter(dst, hasher)
bufp := sync.GetByteSlice()
defer sync.PutByteSlice(bufp)
sr := io.NewSectionReader(base, int64(0), int64(srcSz))
// Keep both the io.LimitedReader types, so we can reset N.
baselr := io.LimitReader(sr, 0).(*io.LimitedReader)
deltalr := io.LimitReader(deltaBuf, 0).(*io.LimitedReader)
for {
buf := *bufp
cmd, err := deltaBuf.ReadByte()
if err == io.EOF {
return 0, plumbing.ZeroHash, ErrInvalidDelta
}
if err != nil {
return 0, plumbing.ZeroHash, err
}
if isCopyFromSrc(cmd) {
offset, err := decodeOffsetByteReader(cmd, deltaBuf)
if err != nil {
return 0, plumbing.ZeroHash, err
}
sz, err := decodeSizeByteReader(cmd, deltaBuf)
if err != nil {
return 0, plumbing.ZeroHash, err
}
if invalidSize(sz, targetSz) ||
invalidOffsetSize(offset, sz, srcSz) {
return 0, plumbing.ZeroHash, err
}
if _, err := sr.Seek(int64(offset), io.SeekStart); err != nil {
return 0, plumbing.ZeroHash, err
}
baselr.N = int64(sz)
if _, err := io.CopyBuffer(mw, baselr, buf); err != nil {
return 0, plumbing.ZeroHash, err
}
remainingTargetSz -= sz
} else if isCopyFromDelta(cmd) {
sz := uint(cmd) // cmd is the size itself
if invalidSize(sz, targetSz) {
return 0, plumbing.ZeroHash, ErrInvalidDelta
}
deltalr.N = int64(sz)
if _, err := io.CopyBuffer(mw, deltalr, buf); err != nil {
return 0, plumbing.ZeroHash, err
}
remainingTargetSz -= sz
} else {
return 0, plumbing.ZeroHash, err
}
if remainingTargetSz <= 0 {
break
}
}
return targetSz, hasher.Sum(), nil
}
// Decodes a number encoded as an unsigned LEB128 at the start of some
// binary data and returns the decoded number and the rest of the
// stream.
//
// This must be called twice on the delta data buffer, first to get the
// expected source buffer size, and again to get the target buffer size.
func decodeLEB128(input []byte) (uint, []byte) {
if len(input) == 0 {
return 0, input
}
var num, sz uint
var b byte
for {
b = input[sz]
num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
sz++
if uint(b)&continuation == 0 || sz == uint(len(input)) {
break
}
}
return num, input[sz:]
}
func decodeLEB128ByteReader(input io.ByteReader) (uint, error) {
var num, sz uint
for {
b, err := input.ReadByte()
if err != nil {
return 0, err
}
num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks
sz++
if uint(b)&continuation == 0 {
break
}
}
return num, nil
}
func isCopyFromSrc(cmd byte) bool {
return (cmd & continuation) != 0
}
func isCopyFromDelta(cmd byte) bool {
return (cmd&continuation) == 0 && cmd != 0
}
func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) {
var offset uint
for _, o := range offsets {
if (cmd & o.mask) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
offset |= uint(next) << o.shift
}
}
return offset, nil
}
func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) {
var offset uint
for _, o := range offsets {
if (cmd & o.mask) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
offset |= uint(delta[0]) << o.shift
delta = delta[1:]
}
}
return offset, delta, nil
}
func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) {
var sz uint
for _, s := range sizes {
if (cmd & s.mask) != 0 {
next, err := delta.ReadByte()
if err != nil {
return 0, err
}
sz |= uint(next) << s.shift
}
}
if sz == 0 {
sz = maxCopySize
}
return sz, nil
}
func decodeSize(cmd byte, delta []byte) (uint, []byte, error) {
var sz uint
for _, s := range sizes {
if (cmd & s.mask) != 0 {
if len(delta) == 0 {
return 0, nil, ErrInvalidDelta
}
sz |= uint(delta[0]) << s.shift
delta = delta[1:]
}
}
if sz == 0 {
sz = maxCopySize
}
return sz, delta, nil
}
func invalidSize(sz, targetSz uint) bool {
return sz > targetSz
}
func invalidOffsetSize(offset, sz, srcSz uint) bool {
return sumOverflows(offset, sz) ||
offset+sz > srcSz
}
func sumOverflows(a, b uint) bool {
return a+b < a
}
package packfile
import (
"bufio"
"bytes"
"crypto"
"encoding/hex"
"fmt"
"hash"
"hash/crc32"
"io"
"sync"
"github.com/go-git/go-git/v6/plumbing"
format "github.com/go-git/go-git/v6/plumbing/format/config"
gogithash "github.com/go-git/go-git/v6/plumbing/hash"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/binary"
"github.com/go-git/go-git/v6/utils/ioutil"
gogitsync "github.com/go-git/go-git/v6/utils/sync"
)
var (
// ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile.
ErrEmptyPackfile = NewError("empty packfile")
// ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect.
ErrBadSignature = NewError("malformed pack file signature")
// ErrMalformedPackfile is returned when the packfile format is incorrect.
ErrMalformedPackfile = NewError("malformed pack file")
// ErrUnsupportedVersion is returned by ReadHeader when the packfile version is
// different than VersionSupported.
ErrUnsupportedVersion = NewError("unsupported packfile version")
// ErrSeekNotSupported returned if seek is not support.
ErrSeekNotSupported = NewError("not seek support")
)
// Scanner provides sequential access to the data stored in a Git packfile.
//
// A Git packfile is a compressed binary format that stores multiple Git objects,
// such as commits, trees, delta objects and blobs. These packfiles are used to
// reduce the size of data when transferring or storing Git repositories.
//
// A Git packfile is structured as follows:
//
// +----------------------------------------------------+
// | PACK File Header |
// +----------------------------------------------------+
// | "PACK" | Version Number | Number of Objects |
// | (4 bytes) | (4 bytes) | (4 bytes) |
// +----------------------------------------------------+
// | Object Entry #1 |
// +----------------------------------------------------+
// | Object Header | Compressed Object Data / Delta |
// | (type + size) | (var-length, zlib compressed) |
// +----------------------------------------------------+
// | ... |
// +----------------------------------------------------+
// | PACK File Footer |
// +----------------------------------------------------+
// | SHA-1 Checksum (20 bytes) |
// +----------------------------------------------------+
//
// For upstream docs, refer to https://git-scm.com/docs/gitformat-pack.
type Scanner struct {
// version holds the packfile version.
version Version
// objects holds the quantity of objects within the packfile.
objects uint32
// objIndex is the current index when going through the packfile objects.
objIndex int
// hasher is used to hash non-delta objects.
hasher plumbing.Hasher
// hasher256 is optional and used to hash the non-delta objects using SHA256.
hasher256 *plumbing.Hasher
// crc is used to generate the CRC-32 checksum of each object's content.
crc hash.Hash32
// packhash hashes the pack contents so that at the end it is able to
// validate the packfile's footer checksum against the calculated hash.
packhash gogithash.Hash
// objectIdSize holds the object ID size.
objectIDSize int
// next holds what state function should be executed on the next
// call to Scan().
nextFn stateFn
// packData holds the data for the last successful call to Scan().
packData PackData
// err holds the first error that occurred.
err error
m sync.Mutex
// storage is optional, and when set is used to store full objects found.
// Note that delta objects are not stored.
storage storer.EncodedObjectStorer
*scannerReader
rbuf *bufio.Reader
lowMemoryMode bool
}
// NewScanner creates a new instance of Scanner.
func NewScanner(rs io.Reader, opts ...ScannerOption) *Scanner {
crc := crc32.NewIEEE()
packhash := gogithash.New(crypto.SHA1)
r := &Scanner{
objIndex: -1,
hasher: plumbing.NewHasher(format.SHA1, plumbing.AnyObject, 0),
crc: crc,
packhash: packhash,
nextFn: packHeaderSignature,
// Set the default size, which can be overridden by opts.
objectIDSize: packhash.Size(),
}
for _, opt := range opts {
opt(r)
}
r.scannerReader = newScannerReader(rs, io.MultiWriter(crc, packhash), r.rbuf)
return r
}
// Scan scans a Packfile sequently. Each call will navigate from a section
// to the next, until the entire file is read.
//
// The section data can be accessed via calls to Data(). Example:
//
// for scanner.Scan() {
// v := scanner.Data().Value()
//
// switch scanner.Data().Section {
// case HeaderSection:
// header := v.(Header)
// fmt.Println("[Header] Objects Qty:", header.ObjectsQty)
// case ObjectSection:
// oh := v.(ObjectHeader)
// fmt.Println("[Object] Object Type:", oh.Type)
// case FooterSection:
// checksum := v.(plumbing.Hash)
// fmt.Println("[Footer] Checksum:", checksum)
// }
// }
func (r *Scanner) Scan() bool {
r.m.Lock()
defer r.m.Unlock()
if r.err != nil || r.nextFn == nil {
return false
}
if err := scan(r); err != nil {
r.err = err
return false
}
return true
}
// Reset resets the current scanner, enabling it to be used to scan the
// same Packfile again.
func (r *Scanner) Reset() {
r.Flush()
r.Seek(0, io.SeekStart)
r.packhash.Reset()
r.objIndex = -1
r.version = 0
r.objects = 0
r.packData = PackData{}
r.err = nil
r.nextFn = packHeaderSignature
}
// Data returns the pack data based on the last call to Scan().
func (r *Scanner) Data() PackData {
return r.packData
}
// Data returns the first error that occurred on the last call to Scan().
// Once an error occurs, calls to Scan() becomes a no-op.
func (r *Scanner) Error() error {
return r.err
}
// SeekFromStart seeks to the given offset from the start of the packfile.
func (r *Scanner) SeekFromStart(offset int64) error {
r.Reset()
if !r.Scan() {
return fmt.Errorf("failed to reset and read header")
}
_, err := r.Seek(offset, io.SeekStart)
return err
}
// WriteObject writes the content of the given ObjectHeader to the provided writer.
func (r *Scanner) WriteObject(oh *ObjectHeader, writer io.Writer) error {
if oh.content != nil && oh.content.Len() > 0 {
_, err := ioutil.CopyBufferPool(writer, oh.content)
return err
}
// If the oh is not an external ref and we don't have the
// content offset, we won't be able to inflate via seeking through
// the packfile.
if oh.externalRef && oh.ContentOffset == 0 {
return plumbing.ErrObjectNotFound
}
// Not a seeker data source.
if r.seeker == nil {
return plumbing.ErrObjectNotFound
}
err := r.inflateContent(oh.ContentOffset, writer)
if err != nil {
return ErrReferenceDeltaNotFound
}
return nil
}
func (r *Scanner) inflateContent(contentOffset int64, writer io.Writer) error {
_, err := r.Seek(contentOffset, io.SeekStart)
if err != nil {
return err
}
zr, err := gogitsync.GetZlibReader(r.scannerReader)
if err != nil {
return fmt.Errorf("zlib reset error: %s", err)
}
defer gogitsync.PutZlibReader(zr)
_, err = ioutil.CopyBufferPool(writer, zr)
return err
}
// scan goes through the next stateFn.
//
// State functions are chained by returning a non-nil value for stateFn.
// In such cases, the returned stateFn will be called immediately after
// the current func.
func scan(r *Scanner) error {
var err error
for state := r.nextFn; state != nil; {
state, err = state(r)
if err != nil {
return err
}
}
return nil
}
// stateFn defines each individual state within the state machine that
// represents a packfile.
type stateFn func(*Scanner) (stateFn, error)
// packHeaderSignature validates the packfile's header signature and
// returns [ErrBadSignature] if the value provided is invalid.
//
// This is always the first state of a packfile and starts the chain
// that handles the entire packfile header.
func packHeaderSignature(r *Scanner) (stateFn, error) {
start := make([]byte, 4)
_, err := r.Read(start)
if err != nil {
return nil, fmt.Errorf("%w: %w", ErrBadSignature, err)
}
if bytes.Equal(start, signature) {
return packVersion, nil
}
return nil, ErrBadSignature
}
// packVersion parses the packfile version. It returns [ErrMalformedPackfile]
// when the version cannot be parsed. If a valid version is parsed, but it is
// not currently supported, it returns [ErrUnsupportedVersion] instead.
func packVersion(r *Scanner) (stateFn, error) {
version, err := binary.ReadUint32(r.scannerReader)
if err != nil {
return nil, fmt.Errorf("%w: cannot read version", ErrMalformedPackfile)
}
v := Version(version)
if !v.Supported() {
return nil, ErrUnsupportedVersion
}
r.version = v
return packObjectsQty, nil
}
// packObjectsQty parses the quantity of objects that the packfile contains.
// If the value cannot be parsed, [ErrMalformedPackfile] is returned.
//
// This state ends the packfile header chain.
func packObjectsQty(r *Scanner) (stateFn, error) {
qty, err := binary.ReadUint32(r.scannerReader)
if err != nil {
return nil, fmt.Errorf("%w: cannot read number of objects", ErrMalformedPackfile)
}
if qty == 0 {
return packFooter, nil
}
r.objects = qty
r.packData = PackData{
Section: HeaderSection,
header: Header{Version: r.version, ObjectsQty: r.objects},
}
r.nextFn = objectEntry
return nil, nil
}
// objectEntry handles the object entries within a packfile. This is generally
// split between object headers and their contents.
//
// The object header contains the object type and size. If the type cannot be parsed,
// [ErrMalformedPackfile] is returned.
//
// When SHA256 is enabled, the scanner will also calculate the SHA256 for each object.
func objectEntry(r *Scanner) (stateFn, error) {
if r.objIndex+1 >= int(r.objects) {
return packFooter, nil
}
r.objIndex++
offset := r.offset
r.Flush()
r.crc.Reset()
b := []byte{0}
_, err := r.Read(b)
if err != nil {
return nil, err
}
typ := parseType(b[0])
if !typ.Valid() {
return nil, fmt.Errorf("%w: invalid object type: %v", ErrMalformedPackfile, b[0])
}
size, err := readVariableLengthSize(b[0], r)
if err != nil {
return nil, err
}
oh := ObjectHeader{
Offset: offset,
Type: typ,
diskType: typ,
Size: int64(size),
}
switch oh.Type {
case plumbing.OFSDeltaObject, plumbing.REFDeltaObject:
// For delta objects, we need to skip the base reference
if oh.Type == plumbing.OFSDeltaObject {
no, err := binary.ReadVariableWidthInt(r.scannerReader)
if err != nil {
return nil, err
}
oh.OffsetReference = oh.Offset - no
} else {
oh.Reference.ResetBySize(r.objectIDSize)
_, err := oh.Reference.ReadFrom(r.scannerReader)
if err != nil {
return nil, err
}
}
}
oh.ContentOffset = r.offset
zr, err := gogitsync.GetZlibReader(r.scannerReader)
if err != nil {
return nil, fmt.Errorf("zlib reset error: %s", err)
}
defer gogitsync.PutZlibReader(zr)
if !oh.Type.IsDelta() {
r.hasher.Reset(oh.Type, oh.Size)
var mw io.Writer = r.hasher
if r.storage != nil {
w, err := r.storage.RawObjectWriter(oh.Type, oh.Size)
if err != nil {
return nil, err
}
defer w.Close()
mw = io.MultiWriter(r.hasher, w)
}
// If the reader isn't seekable, and low memory mode
// isn't supported, keep the contents of the objects in
// memory.
if !r.lowMemoryMode && r.seeker == nil {
oh.content = gogitsync.GetBytesBuffer()
mw = io.MultiWriter(mw, oh.content)
}
if r.hasher256 != nil {
r.hasher256.Reset(oh.Type, oh.Size)
mw = io.MultiWriter(mw, r.hasher256)
}
// For non delta objects, simply calculate the hash of each object.
_, err = ioutil.CopyBufferPool(mw, zr)
if err != nil {
return nil, err
}
oh.Hash = r.hasher.Sum()
if r.hasher256 != nil {
h := r.hasher256.Sum()
oh.Hash256 = &h
}
} else {
// If data source is not io.Seeker, keep the content
// in the cache, so that it can be accessed by the Parser.
if !r.lowMemoryMode {
oh.content = gogitsync.GetBytesBuffer()
_, err = oh.content.ReadFrom(zr)
if err != nil {
return nil, err
}
} else {
// We don't know the compressed length, so we can't seek to
// the next object, we must discard the data instead.
_, err = ioutil.CopyBufferPool(io.Discard, zr)
if err != nil {
return nil, err
}
}
}
r.Flush()
oh.Crc32 = r.crc.Sum32()
r.packData.Section = ObjectSection
r.packData.objectHeader = oh
return nil, nil
}
// packFooter parses the packfile checksum.
// If the checksum cannot be parsed, or it does not match the checksum
// calculated during the scanning process, an [ErrMalformedPackfile] is
// returned.
func packFooter(r *Scanner) (stateFn, error) {
r.Flush()
actual := r.packhash.Sum(nil)
var checksum plumbing.Hash
_, err := checksum.ReadFrom(r.scannerReader)
if err != nil {
return nil, fmt.Errorf("cannot read PACK checksum: %w", ErrMalformedPackfile)
}
if checksum.Compare(actual) != 0 {
return nil, fmt.Errorf("checksum mismatch expected %q but found %q: %w",
hex.EncodeToString(actual), checksum, ErrMalformedPackfile)
}
r.packData.Section = FooterSection
r.packData.checksum = checksum
r.nextFn = nil
return nil, nil
}
func readVariableLengthSize(first byte, reader io.ByteReader) (uint64, error) {
// Extract the first part of the size (last 3 bits of the first byte).
size := uint64(first & 0x0F)
// | 001xxxx | xxxxxxxx | xxxxxxxx | ...
//
// ^^^ ^^^^^^^^ ^^^^^^^^
// Type Size Part 1 Size Part 2
//
// Check if more bytes are needed to fully determine the size.
if first&maskContinue != 0 {
shift := uint(4)
for {
b, err := reader.ReadByte()
if err != nil {
return 0, err
}
// Add the next 7 bits to the size.
size |= uint64(b&0x7F) << shift
// Check if the continuation bit is set.
if b&maskContinue == 0 {
break
}
// Prepare for the next byte.
shift += 7
}
}
return size, nil
}
func parseType(b byte) plumbing.ObjectType {
return plumbing.ObjectType((b & maskType) >> firstLengthBits)
}
package packfile
import (
"bufio"
"github.com/go-git/go-git/v6/plumbing"
format "github.com/go-git/go-git/v6/plumbing/format/config"
)
// ScannerOption configures a Scanner.
type ScannerOption func(*Scanner)
// WithSHA256 enables the SHA256 hashing while scanning a pack file.
func WithSHA256() ScannerOption {
return func(s *Scanner) {
h := plumbing.NewHasher(format.SHA256, plumbing.AnyObject, 0)
s.objectIDSize = format.SHA256Size
s.hasher256 = &h
}
}
// WithBufioReader passes a bufio.Reader for scanner to use.
// It is used for reusing the buffer across multiple scanner instances.
func WithBufioReader(buf *bufio.Reader) ScannerOption {
return func(s *Scanner) {
s.rbuf = buf
}
}
package packfile
import (
"bufio"
"io"
)
// scannerReader has the following characteristics:
// - Provides an io.SeekReader impl for bufio.Reader, when the underlying
// reader supports it.
// - Keeps track of the current read position, for when the underlying reader
// isn't an io.SeekReader, but we still want to know the current offset.
// - Writes to the hash writer what it reads, with the aid of a smaller buffer.
// The buffer helps avoid a performance penalty for performing small writes
// to the crc32 hash writer.
//
// Note that this is passed on to zlib, and it mmust support io.BytesReader, else
// it won't be able to just read the content of the current object, but rather it
// will read the entire packfile.
//
// scannerReader is not thread-safe.
type scannerReader struct {
reader io.Reader
crc io.Writer
rbuf *bufio.Reader
wbuf *bufio.Writer
offset int64
seeker io.Seeker
}
func newScannerReader(r io.Reader, h io.Writer, rbuf *bufio.Reader) *scannerReader {
if rbuf == nil {
rbuf = bufio.NewReader(nil)
}
sr := &scannerReader{
rbuf: rbuf,
wbuf: bufio.NewWriterSize(nil, 64),
crc: h,
}
sr.Reset(r)
return sr
}
func (r *scannerReader) Reset(reader io.Reader) {
r.reader = reader
r.rbuf.Reset(r.reader)
r.wbuf.Reset(r.crc)
r.offset = 0
seeker, ok := r.reader.(io.ReadSeeker)
r.seeker = seeker
if ok {
r.offset, _ = seeker.Seek(0, io.SeekStart)
}
}
func (r *scannerReader) Read(p []byte) (n int, err error) {
n, err = r.rbuf.Read(p)
r.offset += int64(n)
if _, err := r.wbuf.Write(p[:n]); err != nil {
return n, err
}
return n, err
}
func (r *scannerReader) ReadByte() (b byte, err error) {
b, err = r.rbuf.ReadByte()
if err == nil {
r.offset++
return b, r.wbuf.WriteByte(b)
}
return b, err
}
func (r *scannerReader) Flush() error {
return r.wbuf.Flush()
}
// Seek seeks to a location. If the underlying reader is not an io.ReadSeeker,
// then only whence=io.SeekCurrent is supported, any other operation fails.
func (r *scannerReader) Seek(offset int64, whence int) (int64, error) {
var err error
if r.seeker == nil {
if whence != io.SeekCurrent || offset != 0 {
return -1, ErrSeekNotSupported
}
}
if whence == io.SeekCurrent && offset == 0 {
return r.offset, nil
}
r.offset, err = r.seeker.Seek(offset, whence)
r.rbuf.Reset(r.reader)
return r.offset, err
}
package packfile
import (
"bytes"
"github.com/go-git/go-git/v6/plumbing"
)
// Version represents the packfile version.
type Version uint32
// Packfile versions.
const (
V2 Version = 2
)
// Supported returns true if the version is supported.
func (v Version) Supported() bool {
switch v {
case V2:
return true
default:
return false
}
}
// ObjectHeader contains the information related to the object, this information
// is collected from the previous bytes to the content of the object.
type ObjectHeader struct {
Type plumbing.ObjectType
Offset int64
ContentOffset int64
Size int64
Reference plumbing.Hash
OffsetReference int64
Crc32 uint32
Hash plumbing.Hash
Hash256 *plumbing.Hash
content *bytes.Buffer
parent *ObjectHeader
diskType plumbing.ObjectType
externalRef bool
}
// ID returns the preferred object ID.
func (oh *ObjectHeader) ID() plumbing.Hash {
if oh.Hash256 != nil {
return *oh.Hash256
}
return oh.Hash
}
// SectionType represents the type of section in a packfile.
type SectionType int
// Section types.
const (
HeaderSection SectionType = iota
ObjectSection
FooterSection
)
// Header represents the packfile header.
type Header struct {
Version Version
ObjectsQty uint32
}
// PackData represents the data returned by the scanner.
type PackData struct {
Section SectionType
header Header
objectHeader ObjectHeader
checksum plumbing.Hash
}
// Value returns the value of the PackData based on its section type.
func (p PackData) Value() any {
switch p.Section {
case HeaderSection:
return p.header
case ObjectSection:
return p.objectHeader
case FooterSection:
return p.checksum
default:
return nil
}
}
package object
import (
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// Blob is used to store arbitrary data - it is generally a file.
type Blob struct {
// Hash of the blob.
Hash plumbing.Hash
// Size of the (uncompressed) blob.
Size int64
obj plumbing.EncodedObject
}
// GetBlob gets a blob from an object storer and decodes it.
func GetBlob(s storer.EncodedObjectStorer, h plumbing.Hash) (*Blob, error) {
o, err := s.EncodedObject(plumbing.BlobObject, h)
if err != nil {
return nil, err
}
return DecodeBlob(o)
}
// DecodeBlob decodes an encoded object into a *Blob.
func DecodeBlob(o plumbing.EncodedObject) (*Blob, error) {
b := &Blob{}
if err := b.Decode(o); err != nil {
return nil, err
}
return b, nil
}
// ID returns the object ID of the blob. The returned value will always match
// the current value of Blob.Hash.
//
// ID is present to fulfill the Object interface.
func (b *Blob) ID() plumbing.Hash {
return b.Hash
}
// Type returns the type of object. It always returns plumbing.BlobObject.
//
// Type is present to fulfill the Object interface.
func (b *Blob) Type() plumbing.ObjectType {
return plumbing.BlobObject
}
// Decode transforms a plumbing.EncodedObject into a Blob struct.
func (b *Blob) Decode(o plumbing.EncodedObject) error {
if o.Type() != plumbing.BlobObject {
return ErrUnsupportedObject
}
b.Hash = o.Hash()
b.Size = o.Size()
b.obj = o
return nil
}
// Encode transforms a Blob into a plumbing.EncodedObject.
func (b *Blob) Encode(o plumbing.EncodedObject) (err error) {
o.SetType(plumbing.BlobObject)
w, err := o.Writer()
if err != nil {
return err
}
defer ioutil.CheckClose(w, &err)
r, err := b.Reader()
if err != nil {
return err
}
defer ioutil.CheckClose(r, &err)
_, err = ioutil.CopyBufferPool(w, r)
return err
}
// Reader returns a reader allow the access to the content of the blob
func (b *Blob) Reader() (io.ReadCloser, error) {
return b.obj.Reader()
}
// BlobIter provides an iterator for a set of blobs.
type BlobIter struct {
storer.EncodedObjectIter
s storer.EncodedObjectStorer
}
// NewBlobIter takes a storer.EncodedObjectStorer and a
// storer.EncodedObjectIter and returns a *BlobIter that iterates over all
// blobs contained in the storer.EncodedObjectIter.
//
// Any non-blob object returned by the storer.EncodedObjectIter is skipped.
func NewBlobIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) *BlobIter {
return &BlobIter{iter, s}
}
// Next moves the iterator to the next blob and returns a pointer to it. If
// there are no more blobs, it returns io.EOF.
func (iter *BlobIter) Next() (*Blob, error) {
for {
obj, err := iter.EncodedObjectIter.Next()
if err != nil {
return nil, err
}
if obj.Type() != plumbing.BlobObject {
continue
}
return DecodeBlob(obj)
}
}
// ForEach call the cb function for each blob contained on this iter until
// an error happens or the end of the iter is reached. If ErrStop is sent
// the iteration is stop but no error is returned. The iterator is closed.
func (iter *BlobIter) ForEach(cb func(*Blob) error) error {
return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error {
if obj.Type() != plumbing.BlobObject {
return nil
}
b, err := DecodeBlob(obj)
if err != nil {
return err
}
return cb(b)
})
}
package object
import (
"bytes"
"context"
"fmt"
"strings"
"github.com/go-git/go-git/v6/utils/merkletrie"
)
// Change values represent a detected change between two git trees. For
// modifications, From is the original status of the node and To is its
// final status. For insertions, From is the zero value and for
// deletions To is the zero value.
type Change struct {
From ChangeEntry
To ChangeEntry
}
var empty ChangeEntry
// Action returns the kind of action represented by the change, an
// insertion, a deletion or a modification.
func (c *Change) Action() (merkletrie.Action, error) {
if c.From == empty && c.To == empty {
return merkletrie.Action(0),
fmt.Errorf("malformed change: empty from and to")
}
if c.From == empty {
return merkletrie.Insert, nil
}
if c.To == empty {
return merkletrie.Delete, nil
}
return merkletrie.Modify, nil
}
// Files returns the files before and after a change.
// For insertions from will be nil. For deletions to will be nil.
func (c *Change) Files() (from, to *File, err error) {
action, err := c.Action()
if err != nil {
return from, to, err
}
if action == merkletrie.Insert || action == merkletrie.Modify {
to, err = c.To.Tree.TreeEntryFile(&c.To.TreeEntry)
if !c.To.TreeEntry.Mode.IsFile() {
return nil, nil, nil
}
if err != nil {
return from, to, err
}
}
if action == merkletrie.Delete || action == merkletrie.Modify {
from, err = c.From.Tree.TreeEntryFile(&c.From.TreeEntry)
if !c.From.TreeEntry.Mode.IsFile() {
return nil, nil, nil
}
if err != nil {
return from, to, err
}
}
return from, to, err
}
func (c *Change) String() string {
action, err := c.Action()
if err != nil {
return "malformed change"
}
return fmt.Sprintf("<Action: %s, Path: %s>", action, c.name())
}
// Patch returns a Patch with all the file changes in chunks. This
// representation can be used to create several diff outputs.
func (c *Change) Patch() (*Patch, error) {
return c.PatchContext(context.Background())
}
// PatchContext returns a Patch with all the file changes in chunks. This
// representation can be used to create several diff outputs.
// If context expires, an non-nil error will be returned.
// Provided context must be non-nil.
func (c *Change) PatchContext(ctx context.Context) (*Patch, error) {
return getPatchContext(ctx, "", c)
}
func (c *Change) name() string {
if c.From != empty {
return c.From.Name
}
return c.To.Name
}
// ChangeEntry values represent a node that has suffered a change.
type ChangeEntry struct {
// Full path of the node using "/" as separator.
Name string
// Parent tree of the node that has changed.
Tree *Tree
// The entry of the node.
TreeEntry TreeEntry
}
// Changes represents a collection of changes between two git trees.
// Implements sort.Interface lexicographically over the path of the
// changed files.
type Changes []*Change
func (c Changes) Len() int {
return len(c)
}
func (c Changes) Swap(i, j int) {
c[i], c[j] = c[j], c[i]
}
func (c Changes) Less(i, j int) bool {
return strings.Compare(c[i].name(), c[j].name()) < 0
}
func (c Changes) String() string {
var buffer bytes.Buffer
buffer.WriteString("[")
comma := ""
for _, v := range c {
buffer.WriteString(comma)
buffer.WriteString(v.String())
comma = ", "
}
buffer.WriteString("]")
return buffer.String()
}
// Patch returns a Patch with all the changes in chunks. This
// representation can be used to create several diff outputs.
func (c Changes) Patch() (*Patch, error) {
return c.PatchContext(context.Background())
}
// PatchContext returns a Patch with all the changes in chunks. This
// representation can be used to create several diff outputs.
// If context expires, an non-nil error will be returned.
// Provided context must be non-nil.
func (c Changes) PatchContext(ctx context.Context) (*Patch, error) {
return getPatchContext(ctx, "", c...)
}
package object
import (
"errors"
"fmt"
"github.com/go-git/go-git/v6/utils/merkletrie"
"github.com/go-git/go-git/v6/utils/merkletrie/noder"
)
// The following functions transform changes types form the merkletrie
// package to changes types from this package.
func newChange(c merkletrie.Change) (*Change, error) {
ret := &Change{}
var err error
if ret.From, err = newChangeEntry(c.From); err != nil {
return nil, fmt.Errorf("from field: %s", err)
}
if ret.To, err = newChangeEntry(c.To); err != nil {
return nil, fmt.Errorf("to field: %s", err)
}
return ret, nil
}
func newChangeEntry(p noder.Path) (ChangeEntry, error) {
if p == nil {
return empty, nil
}
asTreeNoder, ok := p.Last().(*treeNoder)
if !ok {
return ChangeEntry{}, errors.New("cannot transform non-TreeNoders")
}
return ChangeEntry{
Name: p.String(),
Tree: asTreeNoder.parent,
TreeEntry: TreeEntry{
Name: asTreeNoder.name,
Mode: asTreeNoder.mode,
Hash: asTreeNoder.hash,
},
}, nil
}
func newChanges(src merkletrie.Changes) (Changes, error) {
ret := make(Changes, len(src))
var err error
for i, e := range src {
ret[i], err = newChange(e)
if err != nil {
return nil, fmt.Errorf("change #%d: %s", i, err)
}
}
return ret, nil
}
package object
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"strings"
"github.com/ProtonMail/go-crypto/openpgp"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/sync"
)
const (
beginpgp string = "-----BEGIN PGP SIGNATURE-----"
endpgp string = "-----END PGP SIGNATURE-----"
headerpgp string = "gpgsig"
headerencoding string = "encoding"
// https://github.com/git/git/blob/bcb6cae2966cc407ca1afc77413b3ef11103c175/Documentation/gitformat-signature.txt#L153
// When a merge commit is created from a signed tag, the tag is embedded in
// the commit with the "mergetag" header.
headermergetag string = "mergetag"
defaultUtf8CommitMessageEncoding MessageEncoding = "UTF-8"
)
// Hash represents the hash of an object
type Hash plumbing.Hash
// MessageEncoding represents the encoding of a commit
type MessageEncoding string
// Commit points to a single tree, marking it as what the project looked like
// at a certain point in time. It contains meta-information about that point
// in time, such as a timestamp, the author of the changes since the last
// commit, a pointer to the previous commit(s), etc.
// http://shafiulazam.com/gitbook/1_the_git_object_model.html
type Commit struct {
// Hash of the commit object.
Hash plumbing.Hash
// Author is the original author of the commit.
Author Signature
// Committer is the one performing the commit, might be different from
// Author.
Committer Signature
// MergeTag is the embedded tag object when a merge commit is created by
// merging a signed tag.
MergeTag string
// PGPSignature is the PGP signature of the commit.
PGPSignature string
// Message is the commit message, contains arbitrary text.
Message string
// TreeHash is the hash of the root tree of the commit.
TreeHash plumbing.Hash
// ParentHashes are the hashes of the parent commits of the commit.
ParentHashes []plumbing.Hash
// Encoding is the encoding of the commit.
Encoding MessageEncoding
// List of extra headers of the commit
ExtraHeaders []ExtraHeader
s storer.EncodedObjectStorer
}
// ExtraHeader holds any non-standard header
type ExtraHeader struct {
// Header name
Key string
// Value of the header
Value string
}
// Format implements fmt.Formatter for ExtraHeader.
func (h ExtraHeader) Format(f fmt.State, verb rune) {
switch verb {
case 'v':
fmt.Fprintf(f, "ExtraHeader{Key: %v, Value: %v}", h.Key, h.Value)
default:
fmt.Fprintf(f, "%s", h.Key)
if len(h.Value) > 0 {
fmt.Fprint(f, " ")
// Content may be spread on multiple lines, if so we need to
// prepend each of them with a space for "continuation".
value := strings.TrimSuffix(h.Value, "\n")
lines := strings.Split(value, "\n")
fmt.Fprint(f, strings.Join(lines, "\n "))
}
}
}
// Parse an extra header and indicate whether it may be continue on the next line
func parseExtraHeader(line []byte) (ExtraHeader, bool) {
split := bytes.SplitN(line, []byte{' '}, 2)
out := ExtraHeader{
Key: string(bytes.TrimRight(split[0], "\n")),
Value: "",
}
if len(split) == 2 {
out.Value += string(split[1])
return out, true
}
return out, false
}
// GetCommit gets a commit from an object storer and decodes it.
func GetCommit(s storer.EncodedObjectStorer, h plumbing.Hash) (*Commit, error) {
o, err := s.EncodedObject(plumbing.CommitObject, h)
if err != nil {
return nil, err
}
return DecodeCommit(s, o)
}
// DecodeCommit decodes an encoded object into a *Commit and associates it to
// the given object storer.
func DecodeCommit(s storer.EncodedObjectStorer, o plumbing.EncodedObject) (*Commit, error) {
c := &Commit{s: s}
if err := c.Decode(o); err != nil {
return nil, err
}
return c, nil
}
// Tree returns the Tree from the commit.
func (c *Commit) Tree() (*Tree, error) {
return GetTree(c.s, c.TreeHash)
}
// PatchContext returns the Patch between the actual commit and the provided one.
// Error will be return if context expires. Provided context must be non-nil.
//
// NOTE: Since version 5.1.0 the renames are correctly handled, the settings
// used are the recommended options DefaultDiffTreeOptions.
func (c *Commit) PatchContext(ctx context.Context, to *Commit) (*Patch, error) {
fromTree, err := c.Tree()
if err != nil {
return nil, err
}
var toTree *Tree
if to != nil {
toTree, err = to.Tree()
if err != nil {
return nil, err
}
}
return fromTree.PatchContext(ctx, toTree)
}
// Patch returns the Patch between the actual commit and the provided one.
//
// NOTE: Since version 5.1.0 the renames are correctly handled, the settings
// used are the recommended options DefaultDiffTreeOptions.
func (c *Commit) Patch(to *Commit) (*Patch, error) {
return c.PatchContext(context.Background(), to)
}
// Parents return a CommitIter to the parent Commits.
func (c *Commit) Parents() CommitIter {
return NewCommitIter(c.s,
storer.NewEncodedObjectLookupIter(c.s, plumbing.CommitObject, c.ParentHashes),
)
}
// NumParents returns the number of parents in a commit.
func (c *Commit) NumParents() int {
return len(c.ParentHashes)
}
// ErrParentNotFound is returned when the parent commit is not found.
var ErrParentNotFound = errors.New("commit parent not found")
// Parent returns the ith parent of a commit.
func (c *Commit) Parent(i int) (*Commit, error) {
if len(c.ParentHashes) == 0 || i > len(c.ParentHashes)-1 {
return nil, ErrParentNotFound
}
return GetCommit(c.s, c.ParentHashes[i])
}
// File returns the file with the specified "path" in the commit and a
// nil error if the file exists. If the file does not exist, it returns
// a nil file and the ErrFileNotFound error.
func (c *Commit) File(path string) (*File, error) {
tree, err := c.Tree()
if err != nil {
return nil, err
}
return tree.File(path)
}
// Files returns a FileIter allowing to iterate over the Tree
func (c *Commit) Files() (*FileIter, error) {
tree, err := c.Tree()
if err != nil {
return nil, err
}
return tree.Files(), nil
}
// ID returns the object ID of the commit. The returned value will always match
// the current value of Commit.Hash.
//
// ID is present to fulfill the Object interface.
func (c *Commit) ID() plumbing.Hash {
return c.Hash
}
// Type returns the type of object. It always returns plumbing.CommitObject.
//
// Type is present to fulfill the Object interface.
func (c *Commit) Type() plumbing.ObjectType {
return plumbing.CommitObject
}
// Decode transforms a plumbing.EncodedObject into a Commit struct.
func (c *Commit) Decode(o plumbing.EncodedObject) (err error) {
if o.Type() != plumbing.CommitObject {
return ErrUnsupportedObject
}
c.Hash = o.Hash()
c.Encoding = defaultUtf8CommitMessageEncoding
reader, err := o.Reader()
if err != nil {
return err
}
defer ioutil.CheckClose(reader, &err)
r := sync.GetBufioReader(reader)
defer sync.PutBufioReader(r)
var message bool
var mergetag bool
var pgpsig bool
var msgbuf bytes.Buffer
var extraheader *ExtraHeader
for {
line, err := r.ReadBytes('\n')
if err != nil && err != io.EOF {
return err
}
if mergetag {
if len(line) > 0 && line[0] == ' ' {
line = bytes.TrimLeft(line, " ")
c.MergeTag += string(line)
continue
}
mergetag = false
}
if pgpsig {
if len(line) > 0 && line[0] == ' ' {
line = bytes.TrimLeft(line, " ")
c.PGPSignature += string(line)
continue
}
pgpsig = false
}
if extraheader != nil {
if len(line) > 0 && line[0] == ' ' {
extraheader.Value += string(line[1:])
continue
}
extraheader.Value = strings.TrimRight(extraheader.Value, "\n")
c.ExtraHeaders = append(c.ExtraHeaders, *extraheader)
extraheader = nil
}
if !message {
originalLine := line
line = bytes.TrimSpace(line)
if len(line) == 0 {
message = true
continue
}
split := bytes.SplitN(line, []byte{' '}, 2)
var data []byte
if len(split) == 2 {
data = split[1]
}
switch string(split[0]) {
case "tree":
c.TreeHash = plumbing.NewHash(string(data))
case "parent":
c.ParentHashes = append(c.ParentHashes, plumbing.NewHash(string(data)))
case "author":
c.Author.Decode(data)
case "committer":
c.Committer.Decode(data)
case headermergetag:
c.MergeTag += string(data) + "\n"
mergetag = true
case headerencoding:
c.Encoding = MessageEncoding(data)
case headerpgp:
c.PGPSignature += string(data) + "\n"
pgpsig = true
default:
h, maybecontinued := parseExtraHeader(originalLine)
if maybecontinued {
extraheader = &h
} else {
c.ExtraHeaders = append(c.ExtraHeaders, h)
}
}
} else {
msgbuf.Write(line)
}
if err == io.EOF {
break
}
}
c.Message = msgbuf.String()
return nil
}
// Encode transforms a Commit into a plumbing.EncodedObject.
func (c *Commit) Encode(o plumbing.EncodedObject) error {
return c.encode(o, true)
}
// EncodeWithoutSignature export a Commit into a plumbing.EncodedObject without the signature (correspond to the payload of the PGP signature).
func (c *Commit) EncodeWithoutSignature(o plumbing.EncodedObject) error {
return c.encode(o, false)
}
func (c *Commit) encode(o plumbing.EncodedObject, includeSig bool) (err error) {
o.SetType(plumbing.CommitObject)
w, err := o.Writer()
if err != nil {
return err
}
defer ioutil.CheckClose(w, &err)
if _, err = fmt.Fprintf(w, "tree %s\n", c.TreeHash.String()); err != nil {
return err
}
for _, parent := range c.ParentHashes {
if _, err = fmt.Fprintf(w, "parent %s\n", parent.String()); err != nil {
return err
}
}
if _, err = fmt.Fprint(w, "author "); err != nil {
return err
}
if err = c.Author.Encode(w); err != nil {
return err
}
if _, err = fmt.Fprint(w, "\ncommitter "); err != nil {
return err
}
if err = c.Committer.Encode(w); err != nil {
return err
}
if c.MergeTag != "" {
if _, err = fmt.Fprint(w, "\n"+headermergetag+" "); err != nil {
return err
}
// Split tag information lines and re-write with a left padding and
// newline. Use join for this so it's clear that a newline should not be
// added after this section. The newline will be added either as part of
// the PGP signature or the commit message.
mergetag := strings.TrimSuffix(c.MergeTag, "\n")
lines := strings.Split(mergetag, "\n")
if _, err = fmt.Fprint(w, strings.Join(lines, "\n ")); err != nil {
return err
}
}
if string(c.Encoding) != "" && c.Encoding != defaultUtf8CommitMessageEncoding {
if _, err = fmt.Fprintf(w, "\n%s %s", headerencoding, c.Encoding); err != nil {
return err
}
}
for _, header := range c.ExtraHeaders {
if _, err = fmt.Fprintf(w, "\n%s", header); err != nil {
return err
}
}
if c.PGPSignature != "" && includeSig {
if _, err = fmt.Fprint(w, "\n"+headerpgp+" "); err != nil {
return err
}
// Split all the signature lines and re-write with a left padding and
// newline. Use join for this so it's clear that a newline should not be
// added after this section, as it will be added when the message is
// printed.
signature := strings.TrimSuffix(c.PGPSignature, "\n")
lines := strings.Split(signature, "\n")
if _, err = fmt.Fprint(w, strings.Join(lines, "\n ")); err != nil {
return err
}
}
if _, err = fmt.Fprintf(w, "\n\n%s", c.Message); err != nil {
return err
}
return err
}
// Stats returns the stats of a commit.
func (c *Commit) Stats() (FileStats, error) {
return c.StatsContext(context.Background())
}
// StatsContext returns the stats of a commit. Error will be return if context
// expires. Provided context must be non-nil.
func (c *Commit) StatsContext(ctx context.Context) (FileStats, error) {
fromTree, err := c.Tree()
if err != nil {
return nil, err
}
toTree := &Tree{}
if c.NumParents() != 0 {
firstParent, err := c.Parents().Next()
if err != nil {
return nil, err
}
toTree, err = firstParent.Tree()
if err != nil {
return nil, err
}
}
patch, err := toTree.PatchContext(ctx, fromTree)
if err != nil {
return nil, err
}
return getFileStatsFromFilePatches(patch.FilePatches()), nil
}
func (c *Commit) String() string {
return fmt.Sprintf(
"%s %s\nAuthor: %s\nDate: %s\n\n%s\n",
plumbing.CommitObject, c.Hash, c.Author.String(),
c.Author.When.Format(DateFormat), indent(c.Message),
)
}
// Verify performs PGP verification of the commit with a provided armored
// keyring and returns openpgp.Entity associated with verifying key on success.
func (c *Commit) Verify(armoredKeyRing string) (*openpgp.Entity, error) {
keyRingReader := strings.NewReader(armoredKeyRing)
keyring, err := openpgp.ReadArmoredKeyRing(keyRingReader)
if err != nil {
return nil, err
}
// Extract signature.
signature := strings.NewReader(c.PGPSignature)
encoded := &plumbing.MemoryObject{}
// Encode commit components, excluding signature and get a reader object.
if err := c.EncodeWithoutSignature(encoded); err != nil {
return nil, err
}
er, err := encoded.Reader()
if err != nil {
return nil, err
}
return openpgp.CheckArmoredDetachedSignature(keyring, er, signature, nil)
}
// Less defines a compare function to determine which commit is 'earlier' by:
// - First use Committer.When
// - If Committer.When are equal then use Author.When
// - If Author.When also equal then compare the string value of the hash
func (c *Commit) Less(rhs *Commit) bool {
return c.Committer.When.Before(rhs.Committer.When) ||
(c.Committer.When.Equal(rhs.Committer.When) &&
(c.Author.When.Before(rhs.Author.When) ||
(c.Author.When.Equal(rhs.Author.When) && c.Hash.Compare(rhs.Hash.Bytes()) < 0)))
}
func indent(t string) string {
output := make([]string, 0, strings.Count(t, "\n")+1)
for line := range strings.SplitSeq(t, "\n") {
if len(line) != 0 {
line = " " + line
}
output = append(output, line)
}
return strings.Join(output, "\n")
}
// CommitIter is a generic closable interface for iterating over commits.
type CommitIter interface {
Next() (*Commit, error)
ForEach(func(*Commit) error) error
Close()
}
// storerCommitIter provides an iterator from commits in an EncodedObjectStorer.
type storerCommitIter struct {
storer.EncodedObjectIter
s storer.EncodedObjectStorer
}
// NewCommitIter takes a storer.EncodedObjectStorer and a
// storer.EncodedObjectIter and returns a CommitIter that iterates over all
// commits contained in the storer.EncodedObjectIter.
//
// Any non-commit object returned by the storer.EncodedObjectIter is skipped.
func NewCommitIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) CommitIter {
return &storerCommitIter{iter, s}
}
// Next moves the iterator to the next commit and returns a pointer to it. If
// there are no more commits, it returns io.EOF.
func (iter *storerCommitIter) Next() (*Commit, error) {
obj, err := iter.EncodedObjectIter.Next()
if err != nil {
return nil, err
}
return DecodeCommit(iter.s, obj)
}
// ForEach call the cb function for each commit contained on this iter until
// an error appends or the end of the iter is reached. If ErrStop is sent
// the iteration is stopped but no error is returned. The iterator is closed.
func (iter *storerCommitIter) ForEach(cb func(*Commit) error) error {
return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error {
c, err := DecodeCommit(iter.s, obj)
if err != nil {
return err
}
return cb(c)
})
}
func (iter *storerCommitIter) Close() {
iter.EncodedObjectIter.Close()
}
package object
import (
"container/list"
"errors"
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/storage"
)
type commitPreIterator struct {
seenExternal map[plumbing.Hash]bool
seen map[plumbing.Hash]bool
stack []CommitIter
start *Commit
}
func forEachCommit(next func() (*Commit, error), cb func(*Commit) error) error {
for {
c, err := next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return err
}
err = cb(c)
if err == storer.ErrStop {
break
}
if err != nil {
return err
}
}
return nil
}
// NewCommitPreorderIter returns a CommitIter that walks the commit history,
// starting at the given commit and visiting its parents in pre-order.
// The given callback will be called for each visited commit. Each commit will
// be visited only once. If the callback returns an error, walking will stop
// and will return the error. Other errors might be returned if the history
// cannot be traversed (e.g. missing objects). Ignore allows to skip some
// commits from being iterated.
func NewCommitPreorderIter(
c *Commit,
seenExternal map[plumbing.Hash]bool,
ignore []plumbing.Hash,
) CommitIter {
seen := make(map[plumbing.Hash]bool)
for _, h := range ignore {
seen[h] = true
}
return &commitPreIterator{
seenExternal: seenExternal,
seen: seen,
stack: make([]CommitIter, 0),
start: c,
}
}
func (w *commitPreIterator) Next() (*Commit, error) {
var c *Commit
for {
if w.start != nil {
c = w.start
w.start = nil
} else {
current := len(w.stack) - 1
if current < 0 {
return nil, io.EOF
}
var err error
c, err = w.stack[current].Next()
if err == io.EOF {
w.stack = w.stack[:current]
continue
}
if err != nil {
return nil, err
}
}
if w.seen[c.Hash] || w.seenExternal[c.Hash] {
continue
}
w.seen[c.Hash] = true
if c.NumParents() > 0 {
w.stack = append(w.stack, filteredParentIter(c, w.seen))
}
return c, nil
}
}
func filteredParentIter(c *Commit, seen map[plumbing.Hash]bool) CommitIter {
var hashes []plumbing.Hash
for _, h := range c.ParentHashes {
if !seen[h] {
hashes = append(hashes, h)
}
}
return NewCommitIter(c.s,
storer.NewEncodedObjectLookupIter(c.s, plumbing.CommitObject, hashes),
)
}
func (w *commitPreIterator) ForEach(cb func(*Commit) error) error {
return forEachCommit(w.Next, cb)
}
func (w *commitPreIterator) Close() {}
type commitPostIterator struct {
stack []*Commit
seen map[plumbing.Hash]bool
}
// NewCommitPostorderIter returns a CommitIter that walks the commit
// history like WalkCommitHistory but in post-order. This means that after
// walking a merge commit, the merged commit will be walked before the base
// it was merged on. This can be useful if you wish to see the history in
// chronological order. Ignore allows to skip some commits from being iterated.
func NewCommitPostorderIter(c *Commit, ignore []plumbing.Hash) CommitIter {
seen := make(map[plumbing.Hash]bool)
for _, h := range ignore {
seen[h] = true
}
return &commitPostIterator{
stack: []*Commit{c},
seen: seen,
}
}
func (w *commitPostIterator) Next() (*Commit, error) {
for {
if len(w.stack) == 0 {
return nil, io.EOF
}
c := w.stack[len(w.stack)-1]
w.stack = w.stack[:len(w.stack)-1]
if w.seen[c.Hash] {
continue
}
w.seen[c.Hash] = true
return c, c.Parents().ForEach(func(p *Commit) error {
w.stack = append(w.stack, p)
return nil
})
}
}
func (w *commitPostIterator) ForEach(cb func(*Commit) error) error {
return forEachCommit(w.Next, cb)
}
func (w *commitPostIterator) Close() {}
type commitPostIteratorFirstParent struct {
stack []*Commit
seen map[plumbing.Hash]bool
}
// NewCommitPostorderIterFirstParent returns a CommitIter that walks the commit
// history like WalkCommitHistory but in post-order.
//
// This option acts like the git log --first-parent flag, skipping intermediate
// commits that were brought in via a merge commit.
// Ignore allows to skip some commits from being iterated.
func NewCommitPostorderIterFirstParent(c *Commit, ignore []plumbing.Hash) CommitIter {
seen := make(map[plumbing.Hash]bool)
for _, h := range ignore {
seen[h] = true
}
return &commitPostIteratorFirstParent{
stack: []*Commit{c},
seen: seen,
}
}
func (w *commitPostIteratorFirstParent) Next() (*Commit, error) {
for {
if len(w.stack) == 0 {
return nil, io.EOF
}
c := w.stack[len(w.stack)-1]
w.stack = w.stack[:len(w.stack)-1]
if w.seen[c.Hash] {
continue
}
w.seen[c.Hash] = true
return c, c.Parents().ForEach(func(p *Commit) error {
if len(c.ParentHashes) > 0 && p.Hash == c.ParentHashes[0] {
w.stack = append(w.stack, p)
}
return nil
})
}
}
func (w *commitPostIteratorFirstParent) ForEach(cb func(*Commit) error) error {
return forEachCommit(w.Next, cb)
}
func (w *commitPostIteratorFirstParent) Close() {}
// commitAllIterator stands for commit iterator for all refs.
type commitAllIterator struct {
// currCommit points to the current commit.
currCommit *list.Element
}
// NewCommitAllIter returns a new commit iterator for all refs.
// repoStorer is a repo Storer used to get commits and references.
// commitIterFunc is a commit iterator function, used to iterate through ref commits in chosen order
func NewCommitAllIter(repoStorer storage.Storer, commitIterFunc func(*Commit) CommitIter) (CommitIter, error) {
commitsPath := list.New()
commitsLookup := make(map[plumbing.Hash]*list.Element)
head, err := storer.ResolveReference(repoStorer, plumbing.HEAD)
if err == nil {
err = addReference(repoStorer, commitIterFunc, head, commitsPath, commitsLookup)
}
if err != nil && err != plumbing.ErrReferenceNotFound {
return nil, err
}
// add all references along with the HEAD
refIter, err := repoStorer.IterReferences()
if err != nil {
return nil, err
}
defer refIter.Close()
for {
ref, err := refIter.Next()
if err == io.EOF {
break
}
if err == plumbing.ErrReferenceNotFound {
continue
}
if err != nil {
return nil, err
}
if err = addReference(repoStorer, commitIterFunc, ref, commitsPath, commitsLookup); err != nil {
return nil, err
}
}
return &commitAllIterator{commitsPath.Front()}, nil
}
func addReference(
repoStorer storage.Storer,
commitIterFunc func(*Commit) CommitIter,
ref *plumbing.Reference,
commitsPath *list.List,
commitsLookup map[plumbing.Hash]*list.Element,
) error {
_, exists := commitsLookup[ref.Hash()]
if exists {
// we already have it - skip the reference.
return nil
}
refCommit, _ := GetCommit(repoStorer, ref.Hash())
if refCommit == nil {
// if it's not a commit - skip it.
return nil
}
var (
refCommits []*Commit
parent *list.Element
)
// collect all ref commits to add
commitIter := commitIterFunc(refCommit)
for c, e := commitIter.Next(); e == nil; {
parent, exists = commitsLookup[c.Hash]
if exists {
break
}
refCommits = append(refCommits, c)
c, e = commitIter.Next()
}
commitIter.Close()
if parent == nil {
// common parent - not found
// add all commits to the path from this ref (maybe it's a HEAD and we don't have anything, yet)
for _, c := range refCommits {
parent = commitsPath.PushBack(c)
commitsLookup[c.Hash] = parent
}
} else {
// add ref's commits to the path in reverse order (from the latest)
for i := len(refCommits) - 1; i >= 0; i-- {
c := refCommits[i]
// insert before found common parent
parent = commitsPath.InsertBefore(c, parent)
commitsLookup[c.Hash] = parent
}
}
return nil
}
func (it *commitAllIterator) Next() (*Commit, error) {
if it.currCommit == nil {
return nil, io.EOF
}
c := it.currCommit.Value.(*Commit)
it.currCommit = it.currCommit.Next()
return c, nil
}
func (it *commitAllIterator) ForEach(cb func(*Commit) error) error {
return forEachCommit(it.Next, cb)
}
func (it *commitAllIterator) Close() {
it.currCommit = nil
}
package object
import (
"errors"
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
)
type bfsCommitIterator struct {
seenExternal map[plumbing.Hash]bool
seen map[plumbing.Hash]bool
queue []*Commit
}
// NewCommitIterBSF returns a CommitIter that walks the commit history,
// starting at the given commit and visiting its parents in pre-order.
// The given callback will be called for each visited commit. Each commit will
// be visited only once. If the callback returns an error, walking will stop
// and will return the error. Other errors might be returned if the history
// cannot be traversed (e.g. missing objects). Ignore allows to skip some
// commits from being iterated.
func NewCommitIterBSF(
c *Commit,
seenExternal map[plumbing.Hash]bool,
ignore []plumbing.Hash,
) CommitIter {
seen := make(map[plumbing.Hash]bool)
for _, h := range ignore {
seen[h] = true
}
return &bfsCommitIterator{
seenExternal: seenExternal,
seen: seen,
queue: []*Commit{c},
}
}
func (w *bfsCommitIterator) appendHash(store storer.EncodedObjectStorer, h plumbing.Hash) error {
if w.seen[h] || w.seenExternal[h] {
return nil
}
c, err := GetCommit(store, h)
if err != nil {
return err
}
w.queue = append(w.queue, c)
return nil
}
func (w *bfsCommitIterator) Next() (*Commit, error) {
var c *Commit
for {
if len(w.queue) == 0 {
return nil, io.EOF
}
c = w.queue[0]
w.queue = w.queue[1:]
if w.seen[c.Hash] || w.seenExternal[c.Hash] {
continue
}
w.seen[c.Hash] = true
for _, h := range c.ParentHashes {
err := w.appendHash(c.s, h)
if err != nil {
return nil, err
}
}
return c, nil
}
}
func (w *bfsCommitIterator) ForEach(cb func(*Commit) error) error {
for {
c, err := w.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
err = cb(c)
if errors.Is(err, storer.ErrStop) {
break
}
if err != nil {
return err
}
}
return nil
}
func (w *bfsCommitIterator) Close() {}
package object
import (
"errors"
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
)
// NewFilterCommitIter returns a CommitIter that walks the commit history,
// starting at the passed commit and visiting its parents in Breadth-first order.
// The commits returned by the CommitIter will validate the passed CommitFilter.
// The history won't be transversed beyond a commit if isLimit is true for it.
// Each commit will be visited only once.
// If the commit history can not be traversed, or the Close() method is called,
// the CommitIter won't return more commits.
// If no isValid is passed, all ancestors of from commit will be valid.
// If no isLimit is limit, all ancestors of all commits will be visited.
func NewFilterCommitIter(
from *Commit,
isValid *CommitFilter,
isLimit *CommitFilter,
) CommitIter {
var validFilter CommitFilter
if isValid == nil {
validFilter = func(_ *Commit) bool {
return true
}
} else {
validFilter = *isValid
}
var limitFilter CommitFilter
if isLimit == nil {
limitFilter = func(_ *Commit) bool {
return false
}
} else {
limitFilter = *isLimit
}
return &filterCommitIter{
isValid: validFilter,
isLimit: limitFilter,
visited: map[plumbing.Hash]struct{}{},
queue: []*Commit{from},
}
}
// CommitFilter returns a boolean for the passed Commit
type CommitFilter func(*Commit) bool
// filterCommitIter implements CommitIter
type filterCommitIter struct {
isValid CommitFilter
isLimit CommitFilter
visited map[plumbing.Hash]struct{}
queue []*Commit
lastErr error
}
// Next returns the next commit of the CommitIter.
// It will return io.EOF if there are no more commits to visit,
// or an error if the history could not be traversed.
func (w *filterCommitIter) Next() (*Commit, error) {
var commit *Commit
var err error
for {
commit, err = w.popNewFromQueue()
if err != nil {
return nil, w.close(err)
}
w.visited[commit.Hash] = struct{}{}
if !w.isLimit(commit) {
err = w.addToQueue(commit.s, commit.ParentHashes...)
if err != nil {
return nil, w.close(err)
}
}
if w.isValid(commit) {
return commit, nil
}
}
}
// ForEach runs the passed callback over each Commit returned by the CommitIter
// until the callback returns an error or there is no more commits to traverse.
func (w *filterCommitIter) ForEach(cb func(*Commit) error) error {
for {
commit, err := w.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
if err := cb(commit); errors.Is(err, storer.ErrStop) {
break
} else if err != nil {
return err
}
}
return nil
}
// Error returns the error that caused that the CommitIter is no longer returning commits
func (w *filterCommitIter) Error() error {
return w.lastErr
}
// Close closes the CommitIter
func (w *filterCommitIter) Close() {
w.visited = map[plumbing.Hash]struct{}{}
w.queue = []*Commit{}
w.isLimit = nil
w.isValid = nil
}
// close closes the CommitIter with an error
func (w *filterCommitIter) close(err error) error {
w.Close()
w.lastErr = err
return err
}
// popNewFromQueue returns the first new commit from the internal fifo queue,
// or an io.EOF error if the queue is empty
func (w *filterCommitIter) popNewFromQueue() (*Commit, error) {
var first *Commit
for {
if len(w.queue) == 0 {
if w.lastErr != nil {
return nil, w.lastErr
}
return nil, io.EOF
}
first = w.queue[0]
w.queue = w.queue[1:]
if _, ok := w.visited[first.Hash]; ok {
continue
}
return first, nil
}
}
// addToQueue adds the passed commits to the internal fifo queue if they weren't seen
// or returns an error if the passed hashes could not be used to get valid commits
func (w *filterCommitIter) addToQueue(
store storer.EncodedObjectStorer,
hashes ...plumbing.Hash,
) error {
for _, hash := range hashes {
if _, ok := w.visited[hash]; ok {
continue
}
commit, err := GetCommit(store, hash)
if err != nil {
return err
}
w.queue = append(w.queue, commit)
}
return nil
}
package object
import (
"errors"
"io"
"github.com/emirpasic/gods/trees/binaryheap"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
)
type commitIteratorByCTime struct {
seenExternal map[plumbing.Hash]bool
seen map[plumbing.Hash]bool
heap *binaryheap.Heap
}
// NewCommitIterCTime returns a CommitIter that walks the commit history,
// starting at the given commit and visiting its parents while preserving Committer Time order.
// this appears to be the closest order to `git log`
// The given callback will be called for each visited commit. Each commit will
// be visited only once. If the callback returns an error, walking will stop
// and will return the error. Other errors might be returned if the history
// cannot be traversed (e.g. missing objects). Ignore allows to skip some
// commits from being iterated.
func NewCommitIterCTime(
c *Commit,
seenExternal map[plumbing.Hash]bool,
ignore []plumbing.Hash,
) CommitIter {
seen := make(map[plumbing.Hash]bool)
for _, h := range ignore {
seen[h] = true
}
heap := binaryheap.NewWith(func(a, b any) int {
if a.(*Commit).Committer.When.Before(b.(*Commit).Committer.When) {
return 1
}
return -1
})
heap.Push(c)
return &commitIteratorByCTime{
seenExternal: seenExternal,
seen: seen,
heap: heap,
}
}
func (w *commitIteratorByCTime) Next() (*Commit, error) {
var c *Commit
for {
cIn, ok := w.heap.Pop()
if !ok {
return nil, io.EOF
}
c = cIn.(*Commit)
if w.seen[c.Hash] || w.seenExternal[c.Hash] {
continue
}
w.seen[c.Hash] = true
for _, h := range c.ParentHashes {
if w.seen[h] || w.seenExternal[h] {
continue
}
pc, err := GetCommit(c.s, h)
if err != nil {
return nil, err
}
w.heap.Push(pc)
}
return c, nil
}
}
func (w *commitIteratorByCTime) ForEach(cb func(*Commit) error) error {
for {
c, err := w.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
err = cb(c)
if errors.Is(err, storer.ErrStop) {
break
}
if err != nil {
return err
}
}
return nil
}
func (w *commitIteratorByCTime) Close() {}
package object
import (
"errors"
"io"
"time"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
)
type commitLimitIter struct {
sourceIter CommitIter
limitOptions LogLimitOptions
}
// LogLimitOptions defines limits for log traversal.
type LogLimitOptions struct {
Since *time.Time
Until *time.Time
TailHash plumbing.Hash
}
// NewCommitLimitIterFromIter creates a new commit iterator with limits applied.
func NewCommitLimitIterFromIter(commitIter CommitIter, limitOptions LogLimitOptions) CommitIter {
iterator := new(commitLimitIter)
iterator.sourceIter = commitIter
iterator.limitOptions = limitOptions
return iterator
}
func (c *commitLimitIter) Next() (*Commit, error) {
for {
commit, err := c.sourceIter.Next()
if err != nil {
return nil, err
}
if c.limitOptions.Since != nil && commit.Committer.When.Before(*c.limitOptions.Since) {
continue
}
if c.limitOptions.Until != nil && commit.Committer.When.After(*c.limitOptions.Until) {
continue
}
if c.limitOptions.TailHash == commit.Hash {
return commit, storer.ErrStop
}
return commit, nil
}
}
func (c *commitLimitIter) ForEach(cb func(*Commit) error) error {
for {
commit, nextErr := c.Next()
if nextErr == io.EOF {
break
}
if nextErr != nil && !errors.Is(nextErr, storer.ErrStop) {
return nextErr
}
err := cb(commit)
if errors.Is(err, storer.ErrStop) || errors.Is(nextErr, storer.ErrStop) {
return nil
} else if err != nil {
return err
}
}
return nil
}
func (c *commitLimitIter) Close() {
c.sourceIter.Close()
}
package object
import (
"errors"
"io"
"slices"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
)
type commitPathIter struct {
pathFilter func(string) bool
sourceIter CommitIter
currentCommit *Commit
checkParent bool
}
// NewCommitPathIterFromIter returns a commit iterator which performs diffTree between
// successive trees returned from the commit iterator from the argument. The purpose of this is
// to find the commits that explain how the files that match the path came to be.
// If checkParent is true then the function double checks if potential parent (next commit in a path)
// is one of the parents in the tree (it's used by `git log --all`).
// pathFilter is a function that takes path of file as argument and returns true if we want it
func NewCommitPathIterFromIter(pathFilter func(string) bool, commitIter CommitIter, checkParent bool) CommitIter {
iterator := new(commitPathIter)
iterator.sourceIter = commitIter
iterator.pathFilter = pathFilter
iterator.checkParent = checkParent
return iterator
}
// NewCommitFileIterFromIter is kept for compatibility, can be replaced with NewCommitPathIterFromIter
func NewCommitFileIterFromIter(fileName string, commitIter CommitIter, checkParent bool) CommitIter {
return NewCommitPathIterFromIter(
func(path string) bool {
return path == fileName
},
commitIter,
checkParent,
)
}
func (c *commitPathIter) Next() (*Commit, error) {
if c.currentCommit == nil {
var err error
c.currentCommit, err = c.sourceIter.Next()
if err != nil {
return nil, err
}
}
commit, commitErr := c.getNextFileCommit()
// Setting current-commit to nil to prevent unwanted states when errors are raised
if commitErr != nil {
c.currentCommit = nil
}
return commit, commitErr
}
func (c *commitPathIter) getNextFileCommit() (*Commit, error) {
var parentTree, currentTree *Tree
for {
// Parent-commit can be nil if the current-commit is the initial commit
parentCommit, parentCommitErr := c.sourceIter.Next()
if parentCommitErr != nil {
// If the parent-commit is beyond the initial commit, keep it nil
if parentCommitErr != io.EOF {
return nil, parentCommitErr
}
parentCommit = nil
}
if parentTree == nil {
var currTreeErr error
currentTree, currTreeErr = c.currentCommit.Tree()
if currTreeErr != nil {
return nil, currTreeErr
}
} else {
currentTree = parentTree
parentTree = nil
}
if parentCommit != nil {
var parentTreeErr error
parentTree, parentTreeErr = parentCommit.Tree()
if parentTreeErr != nil {
return nil, parentTreeErr
}
}
// Find diff between current and parent trees
changes, diffErr := DiffTree(currentTree, parentTree)
if diffErr != nil {
return nil, diffErr
}
found := c.hasFileChange(changes, parentCommit)
// Storing the current-commit in-case a change is found, and
// Updating the current-commit for the next-iteration
prevCommit := c.currentCommit
c.currentCommit = parentCommit
if found {
return prevCommit, nil
}
// If not matches found and if parent-commit is beyond the initial commit, then return with EOF
if parentCommit == nil {
return nil, io.EOF
}
}
}
func (c *commitPathIter) hasFileChange(changes Changes, parent *Commit) bool {
for _, change := range changes {
if !c.pathFilter(change.name()) {
continue
}
// filename matches, now check if source iterator contains all commits (from all refs)
if c.checkParent {
// Check if parent is beyond the initial commit
if parent == nil || isParentHash(parent.Hash, c.currentCommit) {
return true
}
continue
}
return true
}
return false
}
func isParentHash(hash plumbing.Hash, commit *Commit) bool {
return slices.Contains(commit.ParentHashes, hash)
}
func (c *commitPathIter) ForEach(cb func(*Commit) error) error {
for {
commit, nextErr := c.Next()
if nextErr == io.EOF {
break
}
if nextErr != nil {
return nextErr
}
err := cb(commit)
if errors.Is(err, storer.ErrStop) {
return nil
} else if err != nil {
return err
}
}
return nil
}
func (c *commitPathIter) Close() {
c.sourceIter.Close()
}
package object
import (
"bytes"
"context"
"errors"
"github.com/go-git/go-git/v6/utils/merkletrie"
"github.com/go-git/go-git/v6/utils/merkletrie/noder"
)
// DiffTree compares the content and mode of the blobs found via two
// tree objects.
// DiffTree does not perform rename detection, use DiffTreeWithOptions
// instead to detect renames.
func DiffTree(a, b *Tree) (Changes, error) {
return DiffTreeContext(context.Background(), a, b)
}
// DiffTreeContext compares the content and mode of the blobs found via two
// tree objects. Provided context must be non-nil.
// An error will be returned if context expires.
func DiffTreeContext(ctx context.Context, a, b *Tree) (Changes, error) {
return DiffTreeWithOptions(ctx, a, b, nil)
}
// DiffTreeOptions are the configurable options when performing a diff tree.
type DiffTreeOptions struct {
// DetectRenames is whether the diff tree will use rename detection.
DetectRenames bool
// RenameScore is the threshold to of similarity between files to consider
// that a pair of delete and insert are a rename. The number must be
// exactly between 0 and 100.
RenameScore uint
// RenameLimit is the maximum amount of files that can be compared when
// detecting renames. The number of comparisons that have to be performed
// is equal to the number of deleted files * the number of added files.
// That means, that if 100 files were deleted and 50 files were added, 5000
// file comparisons may be needed. So, if the rename limit is 50, the number
// of both deleted and added needs to be equal or less than 50.
// A value of 0 means no limit.
RenameLimit uint
// OnlyExactRenames performs only detection of exact renames and will not perform
// any detection of renames based on file similarity.
OnlyExactRenames bool
}
// DefaultDiffTreeOptions are the default and recommended options for the
// diff tree.
var DefaultDiffTreeOptions = &DiffTreeOptions{
DetectRenames: true,
RenameScore: 60,
RenameLimit: 0,
OnlyExactRenames: false,
}
// DiffTreeWithOptions compares the content and mode of the blobs found
// via two tree objects with the given options. The provided context
// must be non-nil.
// If no options are passed, no rename detection will be performed. The
// recommended options are DefaultDiffTreeOptions.
// An error will be returned if the context expires.
// This function will be deprecated and removed in v6 so the default
// behaviour of DiffTree is to detect renames.
func DiffTreeWithOptions(
ctx context.Context,
a, b *Tree,
opts *DiffTreeOptions,
) (Changes, error) {
from := NewTreeRootNode(a)
to := NewTreeRootNode(b)
hashEqual := func(a, b noder.Hasher) bool {
return bytes.Equal(a.Hash(), b.Hash())
}
merkletrieChanges, err := merkletrie.DiffTreeContext(ctx, from, to, hashEqual)
if err != nil {
if errors.Is(err, merkletrie.ErrCanceled) {
return nil, ErrCanceled
}
return nil, err
}
changes, err := newChanges(merkletrieChanges)
if err != nil {
return nil, err
}
if opts == nil {
opts = new(DiffTreeOptions)
}
if opts.DetectRenames {
return DetectRenames(changes, opts)
}
return changes, nil
}
package object
import (
"bytes"
"errors"
"io"
"strings"
"github.com/go-git/go-git/v6/plumbing/filemode"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/binary"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// File represents git file objects.
type File struct {
// Name is the path of the file. It might be relative to a tree,
// depending of the function that generates it.
Name string
// Mode is the file mode.
Mode filemode.FileMode
// Blob with the contents of the file.
Blob
}
// NewFile returns a File based on the given blob object
func NewFile(name string, m filemode.FileMode, b *Blob) *File {
return &File{Name: name, Mode: m, Blob: *b}
}
// Contents returns the contents of a file as a string.
func (f *File) Contents() (content string, err error) {
reader, err := f.Reader()
if err != nil {
return "", err
}
defer ioutil.CheckClose(reader, &err)
buf := new(bytes.Buffer)
if _, err := buf.ReadFrom(reader); err != nil {
return "", err
}
return buf.String(), nil
}
// IsBinary returns if the file is binary or not
func (f *File) IsBinary() (bin bool, err error) {
reader, err := f.Reader()
if err != nil {
return false, err
}
defer ioutil.CheckClose(reader, &err)
return binary.IsBinary(reader)
}
// Lines returns a slice of lines from the contents of a file, stripping
// all end of line characters. If the last line is empty (does not end
// in an end of line), it is also stripped.
func (f *File) Lines() ([]string, error) {
content, err := f.Contents()
if err != nil {
return nil, err
}
splits := strings.Split(content, "\n")
// remove the last line if it is empty
if splits[len(splits)-1] == "" {
return splits[:len(splits)-1], nil
}
return splits, nil
}
// FileIter provides an iterator for the files in a tree.
type FileIter struct {
s storer.EncodedObjectStorer
w TreeWalker
}
// NewFileIter takes a storer.EncodedObjectStorer and a Tree and returns a
// *FileIter that iterates over all files contained in the tree, recursively.
func NewFileIter(s storer.EncodedObjectStorer, t *Tree) *FileIter {
return &FileIter{s: s, w: *NewTreeWalker(t, true, nil)}
}
// Next moves the iterator to the next file and returns a pointer to it. If
// there are no more files, it returns io.EOF.
func (iter *FileIter) Next() (*File, error) {
for {
name, entry, err := iter.w.Next()
if err != nil {
return nil, err
}
if entry.Mode == filemode.Dir || entry.Mode == filemode.Submodule {
continue
}
blob, err := GetBlob(iter.s, entry.Hash)
if err != nil {
return nil, err
}
return NewFile(name, entry.Mode, blob), nil
}
}
// ForEach call the cb function for each file contained in this iter until
// an error happens or the end of the iter is reached. If plumbing.ErrStop is sent
// the iteration is stop but no error is returned. The iterator is closed.
func (iter *FileIter) ForEach(cb func(*File) error) error {
defer iter.Close()
for {
f, err := iter.Next()
if err != nil {
if err == io.EOF {
return nil
}
return err
}
if err := cb(f); err != nil {
if errors.Is(err, storer.ErrStop) {
return nil
}
return err
}
}
}
// Close releases resources associated with the iterator.
func (iter *FileIter) Close() {
iter.w.Close()
}
package object
import (
"errors"
"fmt"
"sort"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
)
// errIsReachable is thrown when first commit is an ancestor of the second
var errIsReachable = fmt.Errorf("first is reachable from second")
// MergeBase mimics the behavior of `git merge-base actual other`, returning the
// best common ancestor between the actual and the passed one.
// The best common ancestors can not be reached from other common ancestors.
func (c *Commit) MergeBase(other *Commit) ([]*Commit, error) {
// use sortedByCommitDateDesc strategy
sorted := sortByCommitDateDesc(c, other)
newer := sorted[0]
older := sorted[1]
newerHistory, err := ancestorsIndex(older, newer)
if errors.Is(err, errIsReachable) {
return []*Commit{older}, nil
}
if err != nil {
return nil, err
}
var res []*Commit
inNewerHistory := isInIndexCommitFilter(newerHistory)
resIter := NewFilterCommitIter(older, &inNewerHistory, &inNewerHistory)
_ = resIter.ForEach(func(commit *Commit) error {
res = append(res, commit)
return nil
})
return Independents(res)
}
// IsAncestor returns true if the actual commit is ancestor of the passed one.
// It returns an error if the history is not transversable
// It mimics the behavior of `git merge --is-ancestor actual other`
func (c *Commit) IsAncestor(other *Commit) (bool, error) {
found := false
iter := NewCommitPreorderIter(other, nil, nil)
err := iter.ForEach(func(comm *Commit) error {
if comm.Hash != c.Hash {
return nil
}
found = true
return storer.ErrStop
})
return found, err
}
// ancestorsIndex returns a map with the ancestors of the starting commit if the
// excluded one is not one of them. It returns errIsReachable if the excluded commit
// is ancestor of the starting, or another error if the history is not traversable.
func ancestorsIndex(excluded, starting *Commit) (map[plumbing.Hash]struct{}, error) {
if excluded.Hash.String() == starting.Hash.String() {
return nil, errIsReachable
}
startingHistory := map[plumbing.Hash]struct{}{}
startingIter := NewCommitIterBSF(starting, nil, nil)
err := startingIter.ForEach(func(commit *Commit) error {
if commit.Hash == excluded.Hash {
return errIsReachable
}
startingHistory[commit.Hash] = struct{}{}
return nil
})
if err != nil {
return nil, err
}
return startingHistory, nil
}
// Independents returns a subset of the passed commits, that are not reachable the others
// It mimics the behavior of `git merge-base --independent commit...`.
func Independents(commits []*Commit) ([]*Commit, error) {
// use sortedByCommitDateDesc strategy
candidates := sortByCommitDateDesc(commits...)
candidates = removeDuplicated(candidates)
seen := map[plumbing.Hash]struct{}{}
var isLimit CommitFilter = func(commit *Commit) bool {
_, ok := seen[commit.Hash]
return ok
}
if len(candidates) < 2 {
return candidates, nil
}
pos := 0
for {
from := candidates[pos]
others := remove(candidates, from)
fromHistoryIter := NewFilterCommitIter(from, nil, &isLimit)
err := fromHistoryIter.ForEach(func(fromAncestor *Commit) error {
for _, other := range others {
if fromAncestor.Hash == other.Hash {
candidates = remove(candidates, other)
others = remove(others, other)
}
}
if len(candidates) == 1 {
return storer.ErrStop
}
seen[fromAncestor.Hash] = struct{}{}
return nil
})
if err != nil {
return nil, err
}
nextPos := indexOf(candidates, from) + 1
if nextPos >= len(candidates) {
break
}
pos = nextPos
}
return candidates, nil
}
// sortByCommitDateDesc returns the passed commits, sorted by `committer.When desc`
//
// Following this strategy, it is tried to reduce the time needed when walking
// the history from one commit to reach the others. It is assumed that ancestors
// use to be committed before its descendant;
// That way `Independents(A^, A)` will be processed as being `Independents(A, A^)`;
// so starting by `A` it will be reached `A^` way sooner than walking from `A^`
// to the initial commit, and then from `A` to `A^`.
func sortByCommitDateDesc(commits ...*Commit) []*Commit {
sorted := make([]*Commit, len(commits))
copy(sorted, commits)
sort.Slice(sorted, func(i, j int) bool {
return sorted[i].Committer.When.After(sorted[j].Committer.When)
})
return sorted
}
// indexOf returns the first position where target was found in the passed commits
func indexOf(commits []*Commit, target *Commit) int {
for i, commit := range commits {
if target.Hash == commit.Hash {
return i
}
}
return -1
}
// remove returns the passed commits excluding the commit toDelete
func remove(commits []*Commit, toDelete *Commit) []*Commit {
res := make([]*Commit, len(commits))
j := 0
for _, commit := range commits {
if commit.Hash == toDelete.Hash {
continue
}
res[j] = commit
j++
}
return res[:j]
}
// removeDuplicated removes duplicated commits from the passed slice of commits
func removeDuplicated(commits []*Commit) []*Commit {
seen := make(map[plumbing.Hash]struct{}, len(commits))
res := make([]*Commit, len(commits))
j := 0
for _, commit := range commits {
if _, ok := seen[commit.Hash]; ok {
continue
}
seen[commit.Hash] = struct{}{}
res[j] = commit
j++
}
return res[:j]
}
// isInIndexCommitFilter returns a commitFilter that returns true
// if the commit is in the passed index.
func isInIndexCommitFilter(index map[plumbing.Hash]struct{}) CommitFilter {
return func(c *Commit) bool {
_, ok := index[c.Hash]
return ok
}
}
// Package object contains implementations of all Git objects and utility
// functions to work with them.
package object
import (
"bytes"
"errors"
"fmt"
"io"
"strconv"
"time"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
)
// ErrUnsupportedObject trigger when a non-supported object is being decoded.
var ErrUnsupportedObject = errors.New("unsupported object type")
// Object is a generic representation of any git object. It is implemented by
// Commit, Tree, Blob, and Tag, and includes the functions that are common to
// them.
//
// Object is returned when an object can be of any type. It is frequently used
// with a type cast to acquire the specific type of object:
//
// func process(obj Object) {
// switch o := obj.(type) {
// case *Commit:
// // o is a Commit
// case *Tree:
// // o is a Tree
// case *Blob:
// // o is a Blob
// case *Tag:
// // o is a Tag
// }
// }
//
// This interface is intentionally different from plumbing.EncodedObject, which
// is a lower level interface used by storage implementations to read and write
// objects in its encoded form.
type Object interface {
ID() plumbing.Hash
Type() plumbing.ObjectType
Decode(plumbing.EncodedObject) error
Encode(plumbing.EncodedObject) error
}
// GetObject gets an object from an object storer and decodes it.
func GetObject(s storer.EncodedObjectStorer, h plumbing.Hash) (Object, error) {
o, err := s.EncodedObject(plumbing.AnyObject, h)
if err != nil {
return nil, err
}
return DecodeObject(s, o)
}
// DecodeObject decodes an encoded object into an Object and associates it to
// the given object storer.
func DecodeObject(s storer.EncodedObjectStorer, o plumbing.EncodedObject) (Object, error) {
switch o.Type() {
case plumbing.CommitObject:
return DecodeCommit(s, o)
case plumbing.TreeObject:
return DecodeTree(s, o)
case plumbing.BlobObject:
return DecodeBlob(o)
case plumbing.TagObject:
return DecodeTag(s, o)
default:
return nil, plumbing.ErrInvalidType
}
}
// DateFormat is the format being used in the original git implementation
const DateFormat = "Mon Jan 02 15:04:05 2006 -0700"
// Signature is used to identify who and when created a commit or tag.
type Signature struct {
// Name represents a person name. It is an arbitrary string.
Name string
// Email is an email, but it cannot be assumed to be well-formed.
Email string
// When is the timestamp of the signature.
When time.Time
}
// Decode decodes a byte slice into a signature
func (s *Signature) Decode(b []byte) {
open := bytes.LastIndexByte(b, '<')
closeBracket := bytes.LastIndexByte(b, '>')
if open == -1 || closeBracket == -1 {
return
}
if closeBracket < open {
return
}
s.Name = string(bytes.Trim(b[:open], " "))
s.Email = string(b[open+1 : closeBracket])
hasTime := closeBracket+2 < len(b)
if hasTime {
s.decodeTimeAndTimeZone(b[closeBracket+2:])
}
}
// Encode encodes a Signature into a writer.
func (s *Signature) Encode(w io.Writer) error {
if _, err := fmt.Fprintf(w, "%s <%s> ", s.Name, s.Email); err != nil {
return err
}
if err := s.encodeTimeAndTimeZone(w); err != nil {
return err
}
return nil
}
var timeZoneLength = 5
func (s *Signature) decodeTimeAndTimeZone(b []byte) {
space := bytes.IndexByte(b, ' ')
if space == -1 {
space = len(b)
}
ts, err := strconv.ParseInt(string(b[:space]), 10, 64)
if err != nil {
return
}
s.When = time.Unix(ts, 0).In(time.UTC)
tzStart := space + 1
if tzStart >= len(b) || tzStart+timeZoneLength > len(b) {
return
}
timezone := string(b[tzStart : tzStart+timeZoneLength])
tzhours, err1 := strconv.ParseInt(timezone[0:3], 10, 64)
tzmins, err2 := strconv.ParseInt(timezone[3:], 10, 64)
if err1 != nil || err2 != nil {
return
}
if tzhours < 0 {
tzmins *= -1
}
tz := time.FixedZone("", int(tzhours*60*60+tzmins*60))
s.When = s.When.In(tz)
}
func (s *Signature) encodeTimeAndTimeZone(w io.Writer) error {
u := max(s.When.Unix(), 0)
_, err := fmt.Fprintf(w, "%d %s", u, s.When.Format("-0700"))
return err
}
func (s *Signature) String() string {
return fmt.Sprintf("%s <%s>", s.Name, s.Email)
}
// ObjectIter provides an iterator for a set of objects.
type ObjectIter struct {
storer.EncodedObjectIter
s storer.EncodedObjectStorer
}
// NewObjectIter takes a storer.EncodedObjectStorer and a
// storer.EncodedObjectIter and returns an *ObjectIter that iterates over all
// objects contained in the storer.EncodedObjectIter.
func NewObjectIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) *ObjectIter {
return &ObjectIter{iter, s}
}
// Next moves the iterator to the next object and returns a pointer to it. If
// there are no more objects, it returns io.EOF.
func (iter *ObjectIter) Next() (Object, error) {
for {
obj, err := iter.EncodedObjectIter.Next()
if err != nil {
return nil, err
}
o, err := iter.toObject(obj)
if errors.Is(err, plumbing.ErrInvalidType) {
continue
}
if err != nil {
return nil, err
}
return o, nil
}
}
// ForEach call the cb function for each object contained on this iter until
// an error happens or the end of the iter is reached. If ErrStop is sent
// the iteration is stop but no error is returned. The iterator is closed.
func (iter *ObjectIter) ForEach(cb func(Object) error) error {
return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error {
o, err := iter.toObject(obj)
if errors.Is(err, plumbing.ErrInvalidType) {
return nil
}
if err != nil {
return err
}
return cb(o)
})
}
func (iter *ObjectIter) toObject(obj plumbing.EncodedObject) (Object, error) {
switch obj.Type() {
case plumbing.BlobObject:
blob := &Blob{}
return blob, blob.Decode(obj)
case plumbing.TreeObject:
tree := &Tree{s: iter.s}
return tree, tree.Decode(obj)
case plumbing.CommitObject:
commit := &Commit{}
return commit, commit.Decode(obj)
case plumbing.TagObject:
tag := &Tag{}
return tag, tag.Decode(obj)
default:
return nil, plumbing.ErrInvalidType
}
}
package object
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"strconv"
"strings"
dmp "github.com/sergi/go-diff/diffmatchpatch"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/filemode"
fdiff "github.com/go-git/go-git/v6/plumbing/format/diff"
"github.com/go-git/go-git/v6/utils/diff"
)
// ErrCanceled is returned when the operation is canceled.
var ErrCanceled = errors.New("operation canceled")
func getPatch(message string, changes ...*Change) (*Patch, error) {
ctx := context.Background()
return getPatchContext(ctx, message, changes...)
}
func getPatchContext(ctx context.Context, message string, changes ...*Change) (*Patch, error) {
if len(changes) == 0 {
return &Patch{message: message}, nil
}
filePatches := make([]fdiff.FilePatch, 0, len(changes))
for _, c := range changes {
select {
case <-ctx.Done():
return nil, ErrCanceled
default:
}
fp, err := filePatchWithContext(ctx, c)
if err != nil {
return nil, err
}
filePatches = append(filePatches, fp)
}
return &Patch{message, filePatches}, nil
}
func filePatchWithContext(ctx context.Context, c *Change) (fdiff.FilePatch, error) {
from, to, err := c.Files()
if err != nil {
return nil, err
}
fromContent, fIsBinary, err := fileContent(from)
if err != nil {
return nil, err
}
toContent, tIsBinary, err := fileContent(to)
if err != nil {
return nil, err
}
if fIsBinary || tIsBinary {
return &textFilePatch{from: c.From, to: c.To}, nil
}
diffs := diff.Do(fromContent, toContent)
chunks := make([]fdiff.Chunk, 0, len(diffs))
for _, d := range diffs {
select {
case <-ctx.Done():
return nil, ErrCanceled
default:
}
var op fdiff.Operation
switch d.Type {
case dmp.DiffEqual:
op = fdiff.Equal
case dmp.DiffDelete:
op = fdiff.Delete
case dmp.DiffInsert:
op = fdiff.Add
}
chunks = append(chunks, &textChunk{d.Text, op})
}
return &textFilePatch{
chunks: chunks,
from: c.From,
to: c.To,
}, nil
}
func fileContent(f *File) (content string, isBinary bool, err error) {
if f == nil {
return content, isBinary, err
}
isBinary, err = f.IsBinary()
if err != nil || isBinary {
return content, isBinary, err
}
content, err = f.Contents()
return content, isBinary, err
}
// Patch is an implementation of fdiff.Patch interface
type Patch struct {
message string
filePatches []fdiff.FilePatch
}
// FilePatches returns the file patches.
func (p *Patch) FilePatches() []fdiff.FilePatch {
return p.filePatches
}
// Message returns the patch message.
func (p *Patch) Message() string {
return p.message
}
// Encode encodes the patch to the given writer.
func (p *Patch) Encode(w io.Writer) error {
ue := fdiff.NewUnifiedEncoder(w, fdiff.DefaultContextLines)
return ue.Encode(p)
}
// Stats returns the file stats.
func (p *Patch) Stats() FileStats {
return getFileStatsFromFilePatches(p.FilePatches())
}
func (p *Patch) String() string {
buf := bytes.NewBuffer(nil)
err := p.Encode(buf)
if err != nil {
return fmt.Sprintf("malformed patch: %s", err.Error())
}
return buf.String()
}
// changeEntryWrapper is an implementation of fdiff.File interface
type changeEntryWrapper struct {
ce ChangeEntry
}
func (f *changeEntryWrapper) Hash() plumbing.Hash {
if !f.ce.TreeEntry.Mode.IsFile() {
return plumbing.ZeroHash
}
return f.ce.TreeEntry.Hash
}
func (f *changeEntryWrapper) Mode() filemode.FileMode {
return f.ce.TreeEntry.Mode
}
func (f *changeEntryWrapper) Path() string {
if !f.ce.TreeEntry.Mode.IsFile() {
return ""
}
return f.ce.Name
}
func (f *changeEntryWrapper) Empty() bool {
return !f.ce.TreeEntry.Mode.IsFile()
}
// textFilePatch is an implementation of fdiff.FilePatch interface
type textFilePatch struct {
chunks []fdiff.Chunk
from, to ChangeEntry
}
func (tf *textFilePatch) Files() (from, to fdiff.File) {
f := &changeEntryWrapper{tf.from}
t := &changeEntryWrapper{tf.to}
if !f.Empty() {
from = f
}
if !t.Empty() {
to = t
}
return from, to
}
func (tf *textFilePatch) IsBinary() bool {
return len(tf.chunks) == 0
}
func (tf *textFilePatch) Chunks() []fdiff.Chunk {
return tf.chunks
}
// textChunk is an implementation of fdiff.Chunk interface
type textChunk struct {
content string
op fdiff.Operation
}
func (t *textChunk) Content() string {
return t.content
}
func (t *textChunk) Type() fdiff.Operation {
return t.op
}
// FileStat stores the status of changes in content of a file.
type FileStat struct {
Name string
Addition int
Deletion int
}
func (fs FileStat) String() string {
return printStat([]FileStat{fs})
}
// FileStats is a collection of FileStat.
type FileStats []FileStat
func (fileStats FileStats) String() string {
return printStat(fileStats)
}
// printStat prints the stats of changes in content of files.
// Original implementation: https://github.com/git/git/blob/1a87c842ece327d03d08096395969aca5e0a6996/diff.c#L2615
// Parts of the output:
// <pad><filename><pad>|<pad><changeNumber><pad><+++/---><newline>
// example: " main.go | 10 +++++++--- "
func printStat(fileStats []FileStat) string {
maxGraphWidth := uint(53)
maxNameLen := 0
maxChangeLen := 0
scaleLinear := func(it, width, maxVal uint) uint {
if it == 0 || maxVal == 0 {
return 0
}
return 1 + (it * (width - 1) / maxVal)
}
for _, fs := range fileStats {
if len(fs.Name) > maxNameLen {
maxNameLen = len(fs.Name)
}
changes := strconv.Itoa(fs.Addition + fs.Deletion)
if len(changes) > maxChangeLen {
maxChangeLen = len(changes)
}
}
var result strings.Builder
for _, fs := range fileStats {
add := uint(fs.Addition)
del := uint(fs.Deletion)
np := maxNameLen - len(fs.Name)
cp := maxChangeLen - len(strconv.Itoa(fs.Addition+fs.Deletion))
total := add + del
if total > maxGraphWidth {
add = scaleLinear(add, maxGraphWidth, total)
del = scaleLinear(del, maxGraphWidth, total)
}
adds := strings.Repeat("+", int(add))
dels := strings.Repeat("-", int(del))
namePad := strings.Repeat(" ", np)
changePad := strings.Repeat(" ", cp)
fmt.Fprintf(&result, " %s%s | %s%d %s%s\n", fs.Name, namePad, changePad, total, adds, dels)
}
return result.String()
}
func getFileStatsFromFilePatches(filePatches []fdiff.FilePatch) FileStats {
fileStats := make(FileStats, 0, len(filePatches))
for _, fp := range filePatches {
// ignore empty patches (binary files, submodule refs updates)
if len(fp.Chunks()) == 0 {
continue
}
cs := FileStat{}
from, to := fp.Files()
if from == nil {
// New File is created.
cs.Name = to.Path()
} else if to == nil {
// File is deleted.
cs.Name = from.Path()
} else if from.Path() != to.Path() {
// File is renamed.
cs.Name = fmt.Sprintf("%s => %s", from.Path(), to.Path())
} else {
cs.Name = from.Path()
}
for _, chunk := range fp.Chunks() {
s := chunk.Content()
if len(s) == 0 {
continue
}
switch chunk.Type() {
case fdiff.Add:
cs.Addition += strings.Count(s, "\n")
if s[len(s)-1] != '\n' {
cs.Addition++
}
case fdiff.Delete:
cs.Deletion += strings.Count(s, "\n")
if s[len(s)-1] != '\n' {
cs.Deletion++
}
}
}
fileStats = append(fileStats, cs)
}
return fileStats
}
package object
import (
"errors"
"io"
"sort"
"strings"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/filemode"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/merkletrie"
)
// DetectRenames detects the renames in the given changes on two trees with
// the given options. It will return the given changes grouping additions and
// deletions into modifications when possible.
// If options is nil, the default diff tree options will be used.
func DetectRenames(
changes Changes,
opts *DiffTreeOptions,
) (Changes, error) {
if opts == nil {
opts = DefaultDiffTreeOptions
}
detector := &renameDetector{
renameScore: int(opts.RenameScore),
renameLimit: int(opts.RenameLimit),
onlyExact: opts.OnlyExactRenames,
}
for _, c := range changes {
action, err := c.Action()
if err != nil {
return nil, err
}
switch action {
case merkletrie.Insert:
detector.added = append(detector.added, c)
case merkletrie.Delete:
detector.deleted = append(detector.deleted, c)
default:
detector.modified = append(detector.modified, c)
}
}
return detector.detect()
}
// renameDetector will detect and resolve renames in a set of changes.
// see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
type renameDetector struct {
added []*Change
deleted []*Change
modified []*Change
renameScore int
renameLimit int
onlyExact bool
}
// detectExactRenames detects matches files that were deleted with files that
// were added where the hash is the same on both. If there are multiple targets
// the one with the most similar path will be chosen as the rename and the
// rest as either deletions or additions.
func (d *renameDetector) detectExactRenames() {
added := groupChangesByHash(d.added)
deletes := groupChangesByHash(d.deleted)
var uniqueAdds []*Change
var nonUniqueAdds [][]*Change
var addedLeft []*Change
for _, cs := range added {
if len(cs) == 1 {
uniqueAdds = append(uniqueAdds, cs[0])
} else {
nonUniqueAdds = append(nonUniqueAdds, cs)
}
}
for _, c := range uniqueAdds {
hash := changeHash(c)
deleted := deletes[hash]
if len(deleted) == 1 {
if sameMode(c, deleted[0]) {
d.modified = append(d.modified, &Change{From: deleted[0].From, To: c.To})
delete(deletes, hash)
} else {
addedLeft = append(addedLeft, c)
}
} else if len(deleted) > 1 {
bestMatch := bestNameMatch(c, deleted)
if bestMatch != nil && sameMode(c, bestMatch) {
d.modified = append(d.modified, &Change{From: bestMatch.From, To: c.To})
delete(deletes, hash)
newDeletes := make([]*Change, 0, len(deleted)-1)
for _, d := range deleted {
if d != bestMatch {
newDeletes = append(newDeletes, d)
}
}
deletes[hash] = newDeletes
}
} else {
addedLeft = append(addedLeft, c)
}
}
for _, added := range nonUniqueAdds {
hash := changeHash(added[0])
deleted := deletes[hash]
if len(deleted) == 1 {
deleted := deleted[0]
bestMatch := bestNameMatch(deleted, added)
if bestMatch != nil && sameMode(deleted, bestMatch) {
d.modified = append(d.modified, &Change{From: deleted.From, To: bestMatch.To})
delete(deletes, hash)
for _, c := range added {
if c != bestMatch {
addedLeft = append(addedLeft, c)
}
}
} else {
addedLeft = append(addedLeft, added...)
}
} else if len(deleted) > 1 {
maxSize := len(deleted) * len(added)
if d.renameLimit > 0 && d.renameLimit < maxSize {
maxSize = d.renameLimit
}
matrix := make(similarityMatrix, 0, maxSize)
for delIdx, del := range deleted {
deletedName := changeName(del)
for addIdx, add := range added {
addedName := changeName(add)
score := nameSimilarityScore(addedName, deletedName)
matrix = append(matrix, similarityPair{added: addIdx, deleted: delIdx, score: score})
if len(matrix) >= maxSize {
break
}
}
if len(matrix) >= maxSize {
break
}
}
sort.Stable(matrix)
usedAdds := make(map[*Change]struct{})
usedDeletes := make(map[*Change]struct{})
for i := len(matrix) - 1; i >= 0; i-- {
del := deleted[matrix[i].deleted]
add := added[matrix[i].added]
if add == nil || del == nil {
// it was already matched
continue
}
usedAdds[add] = struct{}{}
usedDeletes[del] = struct{}{}
d.modified = append(d.modified, &Change{From: del.From, To: add.To})
added[matrix[i].added] = nil
deleted[matrix[i].deleted] = nil
}
for _, c := range added {
if _, ok := usedAdds[c]; !ok && c != nil {
addedLeft = append(addedLeft, c)
}
}
newDeletes := make([]*Change, 0, len(deleted)-len(usedDeletes))
for _, c := range deleted {
if _, ok := usedDeletes[c]; !ok && c != nil {
newDeletes = append(newDeletes, c)
}
}
deletes[hash] = newDeletes
} else {
addedLeft = append(addedLeft, added...)
}
}
d.added = addedLeft
d.deleted = nil
for _, dels := range deletes {
d.deleted = append(d.deleted, dels...)
}
}
// detectContentRenames detects renames based on the similarity of the content
// in the files by building a matrix of pairs between sources and destinations
// and matching by the highest score.
// see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
func (d *renameDetector) detectContentRenames() error {
cnt := max(len(d.added), len(d.deleted))
if d.renameLimit > 0 && cnt > d.renameLimit {
return nil
}
srcs, dsts := d.deleted, d.added
matrix, err := buildSimilarityMatrix(srcs, dsts, d.renameScore)
if err != nil {
return err
}
renames := make([]*Change, 0, min(len(matrix), len(dsts)))
// Match rename pairs on a first come, first serve basis until
// we have looked at everything that is above the minimum score.
for i := len(matrix) - 1; i >= 0; i-- {
pair := matrix[i]
src := srcs[pair.deleted]
dst := dsts[pair.added]
if dst == nil || src == nil {
// It was already matched before
continue
}
renames = append(renames, &Change{From: src.From, To: dst.To})
// Claim destination and source as matched
dsts[pair.added] = nil
srcs[pair.deleted] = nil
}
d.modified = append(d.modified, renames...)
d.added = compactChanges(dsts)
d.deleted = compactChanges(srcs)
return nil
}
func (d *renameDetector) detect() (Changes, error) {
if len(d.added) > 0 && len(d.deleted) > 0 {
d.detectExactRenames()
if !d.onlyExact {
if err := d.detectContentRenames(); err != nil {
return nil, err
}
}
}
result := make(Changes, 0, len(d.added)+len(d.deleted)+len(d.modified))
result = append(result, d.added...)
result = append(result, d.deleted...)
result = append(result, d.modified...)
sort.Stable(result)
return result, nil
}
func bestNameMatch(change *Change, changes []*Change) *Change {
var best *Change
var bestScore int
cname := changeName(change)
for _, c := range changes {
score := nameSimilarityScore(cname, changeName(c))
if score > bestScore {
bestScore = score
best = c
}
}
return best
}
func nameSimilarityScore(a, b string) int {
aDirLen := strings.LastIndexByte(a, '/') + 1
bDirLen := strings.LastIndexByte(b, '/') + 1
dirMin := min(aDirLen, bDirLen)
dirMax := max(aDirLen, bDirLen)
var dirScoreLtr, dirScoreRtl int
if dirMax == 0 {
dirScoreLtr = 100
dirScoreRtl = 100
} else {
var dirSim int
for ; dirSim < dirMin; dirSim++ {
if a[dirSim] != b[dirSim] {
break
}
}
dirScoreLtr = dirSim * 100 / dirMax
if dirScoreLtr == 100 {
dirScoreRtl = 100
} else {
for dirSim = 0; dirSim < dirMin; dirSim++ {
if a[aDirLen-1-dirSim] != b[bDirLen-1-dirSim] {
break
}
}
dirScoreRtl = dirSim * 100 / dirMax
}
}
fileMin := min(len(a)-aDirLen, len(b)-bDirLen)
fileMax := max(len(a)-aDirLen, len(b)-bDirLen)
fileSim := 0
for ; fileSim < fileMin; fileSim++ {
if a[len(a)-1-fileSim] != b[len(b)-1-fileSim] {
break
}
}
fileScore := fileSim * 100 / fileMax
return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100
}
func changeName(c *Change) string {
if c.To != empty {
return c.To.Name
}
return c.From.Name
}
func changeHash(c *Change) plumbing.Hash {
if c.To != empty {
return c.To.TreeEntry.Hash
}
return c.From.TreeEntry.Hash
}
func changeMode(c *Change) filemode.FileMode {
if c.To != empty {
return c.To.TreeEntry.Mode
}
return c.From.TreeEntry.Mode
}
func sameMode(a, b *Change) bool {
return changeMode(a) == changeMode(b)
}
func groupChangesByHash(changes []*Change) map[plumbing.Hash][]*Change {
result := make(map[plumbing.Hash][]*Change)
for _, c := range changes {
hash := changeHash(c)
result[hash] = append(result[hash], c)
}
return result
}
type similarityMatrix []similarityPair
func (m similarityMatrix) Len() int { return len(m) }
func (m similarityMatrix) Swap(i, j int) { m[i], m[j] = m[j], m[i] }
func (m similarityMatrix) Less(i, j int) bool {
if m[i].score == m[j].score {
if m[i].added == m[j].added {
return m[i].deleted < m[j].deleted
}
return m[i].added < m[j].added
}
return m[i].score < m[j].score
}
type similarityPair struct {
// index of the added file
added int
// index of the deleted file
deleted int
// similarity score
score int
}
const maxMatrixSize = 10000
func buildSimilarityMatrix(srcs, dsts []*Change, renameScore int) (similarityMatrix, error) {
// Allocate for the worst-case scenario where every pair has a score
// that we need to consider. We might not need that many.
matrixSize := min(len(srcs)*len(dsts), maxMatrixSize)
matrix := make(similarityMatrix, 0, matrixSize)
srcSizes := make([]int64, len(srcs))
dstSizes := make([]int64, len(dsts))
dstTooLarge := make(map[int]bool)
// Consider each pair of files, if the score is above the minimum
// threshold we need to record that scoring in the matrix so we can
// later find the best matches.
outerLoop:
for srcIdx, src := range srcs {
if changeMode(src) != filemode.Regular {
continue
}
// Declare the from file and the similarity index here to be able to
// reuse it inside the inner loop. The reason to not initialize them
// here is so we can skip the initialization in case they happen to
// not be needed later. They will be initialized inside the inner
// loop if and only if they're needed and reused in subsequent passes.
var from *File
var s *similarityIndex
var err error
for dstIdx, dst := range dsts {
if changeMode(dst) != filemode.Regular {
continue
}
if dstTooLarge[dstIdx] {
continue
}
var to *File
srcSize := srcSizes[srcIdx]
if srcSize == 0 {
from, _, err = src.Files()
if err != nil {
return nil, err
}
srcSize = from.Size + 1
srcSizes[srcIdx] = srcSize
}
dstSize := dstSizes[dstIdx]
if dstSize == 0 {
_, to, err = dst.Files()
if err != nil {
return nil, err
}
dstSize = to.Size + 1
dstSizes[dstIdx] = dstSize
}
minSize, maxSize := srcSize, dstSize
if dstSize < srcSize {
minSize, maxSize = dstSize, srcSize
}
if int(minSize*100/maxSize) < renameScore {
// File sizes are too different to be a match
continue
}
if s == nil {
s, err = fileSimilarityIndex(from)
if err != nil {
if errors.Is(err, errIndexFull) {
continue outerLoop
}
return nil, err
}
}
if to == nil {
_, to, err = dst.Files()
if err != nil {
return nil, err
}
}
di, err := fileSimilarityIndex(to)
if err != nil {
if errors.Is(err, errIndexFull) {
dstTooLarge[dstIdx] = true
}
return nil, err
}
contentScore := s.score(di, 10000)
// The name score returns a value between 0 and 100, so we need to
// convert it to the same range as the content score.
nameScore := nameSimilarityScore(src.From.Name, dst.To.Name) * 100
score := (contentScore*99 + nameScore*1) / 10000
if score < renameScore {
continue
}
matrix = append(matrix, similarityPair{added: dstIdx, deleted: srcIdx, score: score})
}
}
sort.Stable(matrix)
return matrix, nil
}
func compactChanges(changes []*Change) []*Change {
var result []*Change
for _, c := range changes {
if c != nil {
result = append(result, c)
}
}
return result
}
const (
keyShift = 32
maxCountValue = (1 << keyShift) - 1
)
var errIndexFull = errors.New("index is full")
// similarityIndex is an index structure of lines/blocks in one file.
// This structure can be used to compute an approximation of the similarity
// between two files.
// To save space in memory, this index uses a space efficient encoding which
// will not exceed 1MiB per instance. The index starts out at a smaller size
// (closer to 2KiB), but may grow as more distinct blocks within the scanned
// file are discovered.
// see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
type similarityIndex struct {
hashed uint64
// number of non-zero entries in hashes
numHashes int
growAt int
hashes []keyCountPair
hashBits int
}
func fileSimilarityIndex(f *File) (*similarityIndex, error) {
idx := newSimilarityIndex()
if err := idx.hash(f); err != nil {
return nil, err
}
sort.Stable(keyCountPairs(idx.hashes))
return idx, nil
}
func newSimilarityIndex() *similarityIndex {
return &similarityIndex{
hashBits: 8,
hashes: make([]keyCountPair, 1<<8),
growAt: shouldGrowAt(8),
}
}
func (i *similarityIndex) hash(f *File) error {
isBin, err := f.IsBinary()
if err != nil {
return err
}
r, err := f.Reader()
if err != nil {
return err
}
defer ioutil.CheckClose(r, &err)
return i.hashContent(r, f.Size, isBin)
}
func (i *similarityIndex) hashContent(r io.Reader, size int64, isBin bool) error {
buf := make([]byte, 4096)
var ptr, cnt int
remaining := size
for 0 < remaining {
hash := 5381
var blockHashedCnt uint64
// Hash one line or block, whatever happens first
n := int64(0)
for {
if ptr == cnt {
ptr = 0
var err error
cnt, err = io.ReadFull(r, buf)
if err != nil && !errors.Is(err, io.ErrUnexpectedEOF) {
return err
}
if cnt == 0 {
return io.EOF
}
}
n++
c := buf[ptr] & 0xff
ptr++
// Ignore CR in CRLF sequence if it's text
if !isBin && c == '\r' && ptr < cnt && buf[ptr] == '\n' {
continue
}
blockHashedCnt++
if c == '\n' {
break
}
hash = (hash << 5) + hash + int(c)
if n >= 64 || n >= remaining {
break
}
}
i.hashed += blockHashedCnt
if err := i.add(hash, blockHashedCnt); err != nil {
return err
}
remaining -= n
}
return nil
}
// score computes the similarity score between this index and another one.
// A region of a file is defined as a line in a text file or a fixed-size
// block in a binary file. To prepare an index, each region in the file is
// hashed; the values and counts of hashes are retained in a sorted table.
// Define the similarity fraction F as the count of matching regions between
// the two files divided between the maximum count of regions in either file.
// The similarity score is F multiplied by the maxScore constant, yielding a
// range [0, maxScore]. It is defined as maxScore for the degenerate case of
// two empty files.
// The similarity score is symmetrical; i.e. a.score(b) == b.score(a).
func (i *similarityIndex) score(other *similarityIndex, maxScore int) int {
maxHashed := max(i.hashed, other.hashed)
if maxHashed == 0 {
return maxScore
}
return int(i.common(other) * uint64(maxScore) / maxHashed)
}
func (i *similarityIndex) common(dst *similarityIndex) uint64 {
srcIdx, dstIdx := 0, 0
if i.numHashes == 0 || dst.numHashes == 0 {
return 0
}
var common uint64
srcKey, dstKey := i.hashes[srcIdx].key(), dst.hashes[dstIdx].key()
for {
if srcKey == dstKey {
srcCnt, dstCnt := i.hashes[srcIdx].count(), dst.hashes[dstIdx].count()
if srcCnt < dstCnt {
common += srcCnt
} else {
common += dstCnt
}
srcIdx++
if srcIdx == len(i.hashes) {
break
}
srcKey = i.hashes[srcIdx].key()
dstIdx++
if dstIdx == len(dst.hashes) {
break
}
dstKey = dst.hashes[dstIdx].key()
} else if srcKey < dstKey {
// Region of src that is not in dst
srcIdx++
if srcIdx == len(i.hashes) {
break
}
srcKey = i.hashes[srcIdx].key()
} else {
// Region of dst that is not in src
dstIdx++
if dstIdx == len(dst.hashes) {
break
}
dstKey = dst.hashes[dstIdx].key()
}
}
return common
}
func (i *similarityIndex) add(key int, cnt uint64) error {
key = int(uint32(key) * 0x9e370001 >> 1)
j := i.slot(key)
for {
v := i.hashes[j]
if v == 0 {
// It's an empty slot, so we can store it here.
if i.growAt <= i.numHashes {
if err := i.grow(); err != nil {
return err
}
j = i.slot(key)
continue
}
var err error
i.hashes[j], err = newKeyCountPair(key, cnt)
if err != nil {
return err
}
i.numHashes++
return nil
} else if v.key() == key {
// It's the same key, so increment the counter.
var err error
i.hashes[j], err = newKeyCountPair(key, v.count()+cnt)
return err
} else if j+1 >= len(i.hashes) {
j = 0
} else {
j++
}
}
}
type keyCountPair uint64
func newKeyCountPair(key int, cnt uint64) (keyCountPair, error) {
if cnt > maxCountValue {
return 0, errIndexFull
}
return keyCountPair((uint64(key) << keyShift) | cnt), nil
}
func (p keyCountPair) key() int {
return int(p >> keyShift)
}
func (p keyCountPair) count() uint64 {
return uint64(p) & maxCountValue
}
func (i *similarityIndex) slot(key int) int {
// We use 31 - hashBits because the upper bit was already forced
// to be 0 and we want the remaining high bits to be used as the
// table slot.
return int(uint32(key) >> uint(31-i.hashBits))
}
func shouldGrowAt(hashBits int) int {
return (1 << uint(hashBits)) * (hashBits - 3) / hashBits
}
func (i *similarityIndex) grow() error {
if i.hashBits == 30 {
return errIndexFull
}
old := i.hashes
i.hashBits++
i.growAt = shouldGrowAt(i.hashBits)
// TODO(erizocosmico): find a way to check if it will OOM and return
// errIndexFull instead.
i.hashes = make([]keyCountPair, 1<<uint(i.hashBits))
for _, v := range old {
if v != 0 {
j := i.slot(v.key())
for i.hashes[j] != 0 {
j++
if j >= len(i.hashes) {
j = 0
}
}
i.hashes[j] = v
}
}
return nil
}
type keyCountPairs []keyCountPair
func (p keyCountPairs) Len() int { return len(p) }
func (p keyCountPairs) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p keyCountPairs) Less(i, j int) bool { return p[i] < p[j] }
package object
import "bytes"
const (
signatureTypeUnknown signatureType = iota
signatureTypeOpenPGP
signatureTypeX509
signatureTypeSSH
)
var (
// openPGPSignatureFormat is the format of an OpenPGP signature.
openPGPSignatureFormat = signatureFormat{
[]byte("-----BEGIN PGP SIGNATURE-----"),
[]byte("-----BEGIN PGP MESSAGE-----"),
}
// x509SignatureFormat is the format of an X509 signature, which is
// a PKCS#7 (S/MIME) signature.
x509SignatureFormat = signatureFormat{
[]byte("-----BEGIN CERTIFICATE-----"),
[]byte("-----BEGIN SIGNED MESSAGE-----"),
}
// sshSignatureFormat is the format of an SSH signature.
sshSignatureFormat = signatureFormat{
[]byte("-----BEGIN SSH SIGNATURE-----"),
}
)
// knownSignatureFormats is a map of known signature formats, indexed by
// their signatureType.
var knownSignatureFormats = map[signatureType]signatureFormat{
signatureTypeOpenPGP: openPGPSignatureFormat,
signatureTypeX509: x509SignatureFormat,
signatureTypeSSH: sshSignatureFormat,
}
// signatureType represents the type of the signature.
type signatureType int8
// signatureFormat represents the beginning of a signature.
type signatureFormat [][]byte
// typeForSignature returns the type of the signature based on its format.
func typeForSignature(b []byte) signatureType {
for t, i := range knownSignatureFormats {
for _, begin := range i {
if bytes.HasPrefix(b, begin) {
return t
}
}
}
return signatureTypeUnknown
}
// parseSignedBytes returns the position of the last signature block found in
// the given bytes. If no signature block is found, it returns -1.
//
// When multiple signature blocks are found, the position of the last one is
// returned. Any tailing bytes after this signature block start should be
// considered part of the signature.
//
// Given this, it would be safe to use the returned position to split the bytes
// into two parts: the first part containing the message, the second part
// containing the signature.
//
// Example:
//
// message := []byte(`Message with signature
//
// -----BEGIN SSH SIGNATURE-----
// ...`)
//
// var signature string
// if pos, _ := parseSignedBytes(message); pos != -1 {
// signature = string(message[pos:])
// message = message[:pos]
// }
//
// This logic is on par with git's gpg-interface.c:parse_signed_buffer().
// https://github.com/git/git/blob/7c2ef319c52c4997256f5807564523dfd4acdfc7/gpg-interface.c#L668
func parseSignedBytes(b []byte) (int, signatureType) {
n, match := 0, -1
var t signatureType
for n < len(b) {
i := b[n:]
if st := typeForSignature(i); st != signatureTypeUnknown {
match = n
t = st
}
if eol := bytes.IndexByte(i, '\n'); eol >= 0 {
n += eol + 1
continue
}
// If we reach this point, we've reached the end.
break
}
return match, t
}
package object
import (
"bytes"
"fmt"
"io"
"strings"
"github.com/ProtonMail/go-crypto/openpgp"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/sync"
)
// Tag represents an annotated tag object. It points to a single git object of
// any type, but tags typically are applied to commit or blob objects. It
// provides a reference that associates the target with a tag name. It also
// contains meta-information about the tag, including the tagger, tag date and
// message.
//
// Note that this is not used for lightweight tags.
//
// https://git-scm.com/book/en/v2/Git-Internals-Git-References#Tags
type Tag struct {
// Hash of the tag.
Hash plumbing.Hash
// Name of the tag.
Name string
// Tagger is the one who created the tag.
Tagger Signature
// Message is an arbitrary text message.
Message string
// PGPSignature is the PGP signature of the tag.
PGPSignature string
// TargetType is the object type of the target.
TargetType plumbing.ObjectType
// Target is the hash of the target object.
Target plumbing.Hash
s storer.EncodedObjectStorer
}
// GetTag gets a tag from an object storer and decodes it.
func GetTag(s storer.EncodedObjectStorer, h plumbing.Hash) (*Tag, error) {
o, err := s.EncodedObject(plumbing.TagObject, h)
if err != nil {
return nil, err
}
return DecodeTag(s, o)
}
// DecodeTag decodes an encoded object into a *Commit and associates it to the
// given object storer.
func DecodeTag(s storer.EncodedObjectStorer, o plumbing.EncodedObject) (*Tag, error) {
t := &Tag{s: s}
if err := t.Decode(o); err != nil {
return nil, err
}
return t, nil
}
// ID returns the object ID of the tag, not the object that the tag references.
// The returned value will always match the current value of Tag.Hash.
//
// ID is present to fulfill the Object interface.
func (t *Tag) ID() plumbing.Hash {
return t.Hash
}
// Type returns the type of object. It always returns plumbing.TagObject.
//
// Type is present to fulfill the Object interface.
func (t *Tag) Type() plumbing.ObjectType {
return plumbing.TagObject
}
// Decode transforms a plumbing.EncodedObject into a Tag struct.
func (t *Tag) Decode(o plumbing.EncodedObject) (err error) {
if o.Type() != plumbing.TagObject {
return ErrUnsupportedObject
}
t.Hash = o.Hash()
reader, err := o.Reader()
if err != nil {
return err
}
defer ioutil.CheckClose(reader, &err)
r := sync.GetBufioReader(reader)
defer sync.PutBufioReader(r)
for {
var line []byte
line, err = r.ReadBytes('\n')
if err != nil && err != io.EOF {
return err
}
line = bytes.TrimSpace(line)
if len(line) == 0 {
break // Start of message
}
split := bytes.SplitN(line, []byte{' '}, 2)
switch string(split[0]) {
case "object":
t.Target = plumbing.NewHash(string(split[1]))
case "type":
t.TargetType, err = plumbing.ParseObjectType(string(split[1]))
if err != nil {
return err
}
case "tag":
t.Name = string(split[1])
case "tagger":
t.Tagger.Decode(split[1])
}
if err == io.EOF {
return nil
}
}
data, err := io.ReadAll(r)
if err != nil {
return err
}
if sm, _ := parseSignedBytes(data); sm >= 0 {
t.PGPSignature = string(data[sm:])
data = data[:sm]
}
t.Message = string(data)
return nil
}
// Encode transforms a Tag into a plumbing.EncodedObject.
func (t *Tag) Encode(o plumbing.EncodedObject) error {
return t.encode(o, true)
}
// EncodeWithoutSignature export a Tag into a plumbing.EncodedObject without the signature (correspond to the payload of the PGP signature).
func (t *Tag) EncodeWithoutSignature(o plumbing.EncodedObject) error {
return t.encode(o, false)
}
func (t *Tag) encode(o plumbing.EncodedObject, includeSig bool) (err error) {
o.SetType(plumbing.TagObject)
w, err := o.Writer()
if err != nil {
return err
}
defer ioutil.CheckClose(w, &err)
if _, err = fmt.Fprintf(w,
"object %s\ntype %s\ntag %s\ntagger ",
t.Target.String(), t.TargetType.Bytes(), t.Name); err != nil {
return err
}
if err = t.Tagger.Encode(w); err != nil {
return err
}
if _, err = fmt.Fprint(w, "\n\n"); err != nil {
return err
}
if _, err = fmt.Fprint(w, t.Message); err != nil {
return err
}
// Note that this is highly sensitive to what it sent along in the message.
// Message *always* needs to end with a newline, or else the message and the
// signature will be concatenated into a corrupt object. Since this is a
// lower-level method, we assume you know what you are doing and have already
// done the needful on the message in the caller.
if includeSig {
if _, err = fmt.Fprint(w, t.PGPSignature); err != nil {
return err
}
}
return err
}
// Commit returns the commit pointed to by the tag. If the tag points to a
// different type of object ErrUnsupportedObject will be returned.
func (t *Tag) Commit() (*Commit, error) {
if t.TargetType != plumbing.CommitObject {
return nil, ErrUnsupportedObject
}
o, err := t.s.EncodedObject(plumbing.CommitObject, t.Target)
if err != nil {
return nil, err
}
return DecodeCommit(t.s, o)
}
// Tree returns the tree pointed to by the tag. If the tag points to a commit
// object the tree of that commit will be returned. If the tag does not point
// to a commit or tree object ErrUnsupportedObject will be returned.
func (t *Tag) Tree() (*Tree, error) {
switch t.TargetType {
case plumbing.CommitObject:
c, err := t.Commit()
if err != nil {
return nil, err
}
return c.Tree()
case plumbing.TreeObject:
return GetTree(t.s, t.Target)
default:
return nil, ErrUnsupportedObject
}
}
// Blob returns the blob pointed to by the tag. If the tag points to a
// different type of object ErrUnsupportedObject will be returned.
func (t *Tag) Blob() (*Blob, error) {
if t.TargetType != plumbing.BlobObject {
return nil, ErrUnsupportedObject
}
return GetBlob(t.s, t.Target)
}
// Object returns the object pointed to by the tag.
func (t *Tag) Object() (Object, error) {
o, err := t.s.EncodedObject(t.TargetType, t.Target)
if err != nil {
return nil, err
}
return DecodeObject(t.s, o)
}
// String returns the meta information contained in the tag as a formatted
// string.
func (t *Tag) String() string {
obj, _ := t.Object()
return fmt.Sprintf(
"%s %s\nTagger: %s\nDate: %s\n\n%s\n%s",
plumbing.TagObject, t.Name, t.Tagger.String(), t.Tagger.When.Format(DateFormat),
t.Message, objectAsString(obj),
)
}
// Verify performs PGP verification of the tag with a provided armored
// keyring and returns openpgp.Entity associated with verifying key on success.
func (t *Tag) Verify(armoredKeyRing string) (*openpgp.Entity, error) {
keyRingReader := strings.NewReader(armoredKeyRing)
keyring, err := openpgp.ReadArmoredKeyRing(keyRingReader)
if err != nil {
return nil, err
}
// Extract signature.
signature := strings.NewReader(t.PGPSignature)
encoded := &plumbing.MemoryObject{}
// Encode tag components, excluding signature and get a reader object.
if err := t.EncodeWithoutSignature(encoded); err != nil {
return nil, err
}
er, err := encoded.Reader()
if err != nil {
return nil, err
}
return openpgp.CheckArmoredDetachedSignature(keyring, er, signature, nil)
}
// TagIter provides an iterator for a set of tags.
type TagIter struct {
storer.EncodedObjectIter
s storer.EncodedObjectStorer
}
// NewTagIter takes a storer.EncodedObjectStorer and a
// storer.EncodedObjectIter and returns a *TagIter that iterates over all
// tags contained in the storer.EncodedObjectIter.
//
// Any non-tag object returned by the storer.EncodedObjectIter is skipped.
func NewTagIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) *TagIter {
return &TagIter{iter, s}
}
// Next moves the iterator to the next tag and returns a pointer to it. If
// there are no more tags, it returns io.EOF.
func (iter *TagIter) Next() (*Tag, error) {
obj, err := iter.EncodedObjectIter.Next()
if err != nil {
return nil, err
}
return DecodeTag(iter.s, obj)
}
// ForEach call the cb function for each tag contained on this iter until
// an error happens or the end of the iter is reached. If ErrStop is sent
// the iteration is stop but no error is returned. The iterator is closed.
func (iter *TagIter) ForEach(cb func(*Tag) error) error {
return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error {
t, err := DecodeTag(iter.s, obj)
if err != nil {
return err
}
return cb(t)
})
}
func objectAsString(obj Object) string {
switch o := obj.(type) {
case *Commit:
return o.String()
default:
return ""
}
}
package object
import (
"context"
"errors"
"fmt"
"io"
"path"
"path/filepath"
"sort"
"strings"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/filemode"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/utils/ioutil"
"github.com/go-git/go-git/v6/utils/sync"
)
const (
maxTreeDepth = 1024
startingStackSize = 8
)
// New errors defined by this package.
var (
ErrMaxTreeDepth = errors.New("maximum tree depth exceeded")
ErrFileNotFound = errors.New("file not found")
ErrDirectoryNotFound = errors.New("directory not found")
ErrEntryNotFound = errors.New("entry not found")
ErrEntriesNotSorted = errors.New("entries in tree are not sorted")
)
// Tree is basically like a directory - it references a bunch of other trees
// and/or blobs (i.e. files and sub-directories)
type Tree struct {
Entries []TreeEntry
Hash plumbing.Hash
s storer.EncodedObjectStorer
m map[string]*TreeEntry
t map[string]*Tree // tree path cache
}
// GetTree gets a tree from an object storer and decodes it.
func GetTree(s storer.EncodedObjectStorer, h plumbing.Hash) (*Tree, error) {
o, err := s.EncodedObject(plumbing.TreeObject, h)
if err != nil {
return nil, err
}
return DecodeTree(s, o)
}
// DecodeTree decodes an encoded object into a *Tree and associates it to the
// given object storer.
func DecodeTree(s storer.EncodedObjectStorer, o plumbing.EncodedObject) (*Tree, error) {
t := &Tree{s: s}
if err := t.Decode(o); err != nil {
return nil, err
}
return t, nil
}
// TreeEntry represents a file
type TreeEntry struct {
Name string
Mode filemode.FileMode
Hash plumbing.Hash
}
// File returns the hash of the file identified by the `path` argument.
// The path is interpreted as relative to the tree receiver.
func (t *Tree) File(path string) (*File, error) {
e, err := t.FindEntry(path)
if err != nil {
return nil, ErrFileNotFound
}
blob, err := GetBlob(t.s, e.Hash)
if err != nil {
if errors.Is(err, plumbing.ErrObjectNotFound) {
return nil, ErrFileNotFound
}
return nil, err
}
return NewFile(path, e.Mode, blob), nil
}
// Size returns the plaintext size of an object, without reading it
// into memory.
func (t *Tree) Size(path string) (int64, error) {
e, err := t.FindEntry(path)
if err != nil {
return 0, ErrEntryNotFound
}
return t.s.EncodedObjectSize(e.Hash)
}
// Tree returns the tree identified by the `path` argument.
// The path is interpreted as relative to the tree receiver.
func (t *Tree) Tree(path string) (*Tree, error) {
e, err := t.FindEntry(path)
if err != nil {
return nil, ErrDirectoryNotFound
}
tree, err := GetTree(t.s, e.Hash)
if errors.Is(err, plumbing.ErrObjectNotFound) {
return nil, ErrDirectoryNotFound
}
return tree, err
}
// TreeEntryFile returns the *File for a given *TreeEntry.
func (t *Tree) TreeEntryFile(e *TreeEntry) (*File, error) {
blob, err := GetBlob(t.s, e.Hash)
if err != nil {
return nil, err
}
return NewFile(e.Name, e.Mode, blob), nil
}
// FindEntry search a TreeEntry in this tree or any subtree.
func (t *Tree) FindEntry(path string) (*TreeEntry, error) {
if t.t == nil {
t.t = make(map[string]*Tree)
}
pathParts := strings.Split(path, "/")
startingTree := t
pathCurrent := ""
// search for the longest path in the tree path cache
for i := len(pathParts) - 1; i > 1; i-- {
path := filepath.Join(pathParts[:i]...)
tree, ok := t.t[path]
if ok {
startingTree = tree
pathParts = pathParts[i:]
pathCurrent = path
break
}
}
var tree *Tree
var err error
for tree = startingTree; len(pathParts) > 1; pathParts = pathParts[1:] {
if tree, err = tree.dir(pathParts[0]); err != nil {
return nil, err
}
pathCurrent = filepath.Join(pathCurrent, pathParts[0])
t.t[pathCurrent] = tree
}
return tree.entry(pathParts[0])
}
func (t *Tree) dir(baseName string) (*Tree, error) {
entry, err := t.entry(baseName)
if err != nil {
return nil, ErrDirectoryNotFound
}
obj, err := t.s.EncodedObject(plumbing.TreeObject, entry.Hash)
if err != nil {
return nil, err
}
tree := &Tree{s: t.s}
err = tree.Decode(obj)
return tree, err
}
func (t *Tree) entry(baseName string) (*TreeEntry, error) {
if t.m == nil {
t.buildMap()
}
entry, ok := t.m[baseName]
if !ok {
return nil, ErrEntryNotFound
}
return entry, nil
}
// Files returns a FileIter allowing to iterate over the Tree
func (t *Tree) Files() *FileIter {
return NewFileIter(t.s, t)
}
// ID returns the object ID of the tree. The returned value will always match
// the current value of Tree.Hash.
//
// ID is present to fulfill the Object interface.
func (t *Tree) ID() plumbing.Hash {
return t.Hash
}
// Type returns the type of object. It always returns plumbing.TreeObject.
func (t *Tree) Type() plumbing.ObjectType {
return plumbing.TreeObject
}
// Decode transform an plumbing.EncodedObject into a Tree struct
func (t *Tree) Decode(o plumbing.EncodedObject) (err error) {
if o.Type() != plumbing.TreeObject {
return ErrUnsupportedObject
}
t.Hash = o.Hash()
if o.Size() == 0 {
return nil
}
t.Entries = nil
t.m = nil
reader, err := o.Reader()
if err != nil {
return err
}
defer ioutil.CheckClose(reader, &err)
r := sync.GetBufioReader(reader)
defer sync.PutBufioReader(r)
for {
str, err := r.ReadString(' ')
if err != nil {
if err == io.EOF {
break
}
return err
}
str = str[:len(str)-1] // strip last byte (' ')
mode, err := filemode.New(str)
if err != nil {
return err
}
name, err := r.ReadString(0)
if err != nil && err != io.EOF {
return err
}
var hash plumbing.Hash
if _, err = hash.ReadFrom(r); err != nil {
return err
}
baseName := name[:len(name)-1]
t.Entries = append(t.Entries, TreeEntry{
Hash: hash,
Mode: mode,
Name: baseName,
})
}
return nil
}
// TreeEntrySorter is a helper type for sorting TreeEntry slices.
type TreeEntrySorter []TreeEntry
func (s TreeEntrySorter) Len() int {
return len(s)
}
func (s TreeEntrySorter) Less(i, j int) bool {
name1 := s[i].Name
name2 := s[j].Name
if s[i].Mode == filemode.Dir {
name1 += "/"
}
if s[j].Mode == filemode.Dir {
name2 += "/"
}
return name1 < name2
}
func (s TreeEntrySorter) Swap(i, j int) {
s[i], s[j] = s[j], s[i]
}
// Encode transforms a Tree into a plumbing.EncodedObject.
// The tree entries must be sorted by name.
func (t *Tree) Encode(o plumbing.EncodedObject) (err error) {
o.SetType(plumbing.TreeObject)
w, err := o.Writer()
if err != nil {
return err
}
defer ioutil.CheckClose(w, &err)
if !sort.IsSorted(TreeEntrySorter(t.Entries)) {
return ErrEntriesNotSorted
}
for _, entry := range t.Entries {
if strings.IndexByte(entry.Name, 0) != -1 {
return fmt.Errorf("malformed filename %q", entry.Name)
}
if _, err = fmt.Fprintf(w, "%o %s", entry.Mode, entry.Name); err != nil {
return err
}
if _, err = w.Write([]byte{0x00}); err != nil {
return err
}
if _, err = entry.Hash.WriteTo(w); err != nil {
return err
}
}
return err
}
func (t *Tree) buildMap() {
t.m = make(map[string]*TreeEntry)
for i := 0; i < len(t.Entries); i++ {
t.m[t.Entries[i].Name] = &t.Entries[i]
}
}
// Diff returns a list of changes between this tree and the provided one
func (t *Tree) Diff(to *Tree) (Changes, error) {
return t.DiffContext(context.Background(), to)
}
// DiffContext returns a list of changes between this tree and the provided one
// Error will be returned if context expires. Provided context must be non nil.
//
// NOTE: Since version 5.1.0 the renames are correctly handled, the settings
// used are the recommended options DefaultDiffTreeOptions.
func (t *Tree) DiffContext(ctx context.Context, to *Tree) (Changes, error) {
return DiffTreeWithOptions(ctx, t, to, DefaultDiffTreeOptions)
}
// Patch returns a slice of Patch objects with all the changes between trees
// in chunks. This representation can be used to create several diff outputs.
func (t *Tree) Patch(to *Tree) (*Patch, error) {
return t.PatchContext(context.Background(), to)
}
// PatchContext returns a slice of Patch objects with all the changes between
// trees in chunks. This representation can be used to create several diff
// outputs. If context expires, an error will be returned. Provided context must
// be non-nil.
//
// NOTE: Since version 5.1.0 the renames are correctly handled, the settings
// used are the recommended options DefaultDiffTreeOptions.
func (t *Tree) PatchContext(ctx context.Context, to *Tree) (*Patch, error) {
changes, err := t.DiffContext(ctx, to)
if err != nil {
return nil, err
}
return changes.PatchContext(ctx)
}
// treeEntryIter facilitates iterating through the TreeEntry objects in a Tree.
type treeEntryIter struct {
t *Tree
pos int
}
func (iter *treeEntryIter) Next() (TreeEntry, error) {
if iter.pos >= len(iter.t.Entries) {
return TreeEntry{}, io.EOF
}
iter.pos++
return iter.t.Entries[iter.pos-1], nil
}
// TreeWalker provides a means of walking through all of the entries in a Tree.
type TreeWalker struct {
stack []*treeEntryIter
base string
recursive bool
seen map[plumbing.Hash]bool
s storer.EncodedObjectStorer
t *Tree
}
// NewTreeWalker returns a new TreeWalker for the given tree.
//
// It is the caller's responsibility to call Close() when finished with the
// tree walker.
func NewTreeWalker(t *Tree, recursive bool, seen map[plumbing.Hash]bool) *TreeWalker {
stack := make([]*treeEntryIter, 0, startingStackSize)
stack = append(stack, &treeEntryIter{t, 0})
return &TreeWalker{
stack: stack,
recursive: recursive,
seen: seen,
s: t.s,
t: t,
}
}
// Next returns the next object from the tree. Objects are returned in order
// and subtrees are included. After the last object has been returned further
// calls to Next() will return io.EOF.
//
// In the current implementation any objects which cannot be found in the
// underlying repository will be skipped automatically. It is possible that this
// may change in future versions.
func (w *TreeWalker) Next() (name string, entry TreeEntry, err error) {
var obj *Tree
for {
current := len(w.stack) - 1
if current < 0 {
// Nothing left on the stack so we're finished
err = io.EOF
return name, entry, err
}
if current > maxTreeDepth {
// We're probably following bad data or some self-referencing tree
err = ErrMaxTreeDepth
return name, entry, err
}
entry, err = w.stack[current].Next()
if err == io.EOF {
// Finished with the current tree, move back up to the parent
w.stack = w.stack[:current]
w.base, _ = path.Split(w.base)
w.base = strings.TrimSuffix(w.base, "/")
continue
}
if err != nil {
return name, entry, err
}
if w.seen[entry.Hash] {
continue
}
if entry.Mode == filemode.Dir {
obj, err = GetTree(w.s, entry.Hash)
}
name = simpleJoin(w.base, entry.Name)
if err != nil {
err = io.EOF
return name, entry, err
}
break
}
if !w.recursive {
return name, entry, err
}
if obj != nil {
w.stack = append(w.stack, &treeEntryIter{obj, 0})
w.base = simpleJoin(w.base, entry.Name)
}
return name, entry, err
}
// Tree returns the tree that the tree walker most recently operated on.
func (w *TreeWalker) Tree() *Tree {
current := len(w.stack) - 1
if w.stack[current].pos == 0 {
current--
}
if current < 0 {
return nil
}
return w.stack[current].t
}
// Close releases any resources used by the TreeWalker.
func (w *TreeWalker) Close() {
w.stack = nil
}
// TreeIter provides an iterator for a set of trees.
type TreeIter struct {
storer.EncodedObjectIter
s storer.EncodedObjectStorer
}
// NewTreeIter takes a storer.EncodedObjectStorer and a
// storer.EncodedObjectIter and returns a *TreeIter that iterates over all
// tree contained in the storer.EncodedObjectIter.
//
// Any non-tree object returned by the storer.EncodedObjectIter is skipped.
func NewTreeIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) *TreeIter {
return &TreeIter{iter, s}
}
// Next moves the iterator to the next tree and returns a pointer to it. If
// there are no more trees, it returns io.EOF.
func (iter *TreeIter) Next() (*Tree, error) {
for {
obj, err := iter.EncodedObjectIter.Next()
if err != nil {
return nil, err
}
if obj.Type() != plumbing.TreeObject {
continue
}
return DecodeTree(iter.s, obj)
}
}
// ForEach call the cb function for each tree contained on this iter until
// an error happens or the end of the iter is reached. If ErrStop is sent
// the iteration is stop but no error is returned. The iterator is closed.
func (iter *TreeIter) ForEach(cb func(*Tree) error) error {
return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error {
if obj.Type() != plumbing.TreeObject {
return nil
}
t, err := DecodeTree(iter.s, obj)
if err != nil {
return err
}
return cb(t)
})
}
func simpleJoin(parent, child string) string {
if len(parent) > 0 {
return parent + "/" + child
}
return child
}
package object
import (
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/filemode"
"github.com/go-git/go-git/v6/utils/merkletrie/noder"
)
// A treenoder is a helper type that wraps git trees into merkletrie
// noders.
//
// As a merkletrie noder doesn't understand the concept of modes (e.g.
// file permissions), the treenoder includes the mode of the git tree in
// the hash, so changes in the modes will be detected as modifications
// to the file contents by the merkletrie difftree algorithm. This is
// consistent with how the "git diff-tree" command works.
type treeNoder struct {
parent *Tree // the root node is its own parent
name string // empty string for the root node
mode filemode.FileMode
hash plumbing.Hash
children []noder.Noder // memoized
}
// NewTreeRootNode returns the root node of a Tree
func NewTreeRootNode(t *Tree) noder.Noder {
if t == nil {
return &treeNoder{}
}
return &treeNoder{
parent: t,
name: "",
mode: filemode.Dir,
hash: t.Hash,
}
}
func (t *treeNoder) Skip() bool {
return false
}
func (t *treeNoder) isRoot() bool {
return t.name == ""
}
func (t *treeNoder) String() string {
return "treeNoder <" + t.name + ">"
}
func (t *treeNoder) Hash() []byte {
if t.mode == filemode.Deprecated {
return append(t.hash.Bytes(), filemode.Regular.Bytes()...)
}
return append(t.hash.Bytes(), t.mode.Bytes()...)
}
func (t *treeNoder) Name() string {
return t.name
}
func (t *treeNoder) IsDir() bool {
return t.mode == filemode.Dir
}
// Children will return the children of a treenoder as treenoders,
// building them from the children of the wrapped git tree.
func (t *treeNoder) Children() ([]noder.Noder, error) {
if t.mode != filemode.Dir {
return noder.NoChildren, nil
}
// children are memoized for efficiency
if t.children != nil {
return t.children, nil
}
// the parent of the returned children will be ourself as a tree if
// we are a not the root treenoder. The root is special as it
// is is own parent.
parent := t.parent
if !t.isRoot() {
var err error
if parent, err = t.parent.Tree(t.name); err != nil {
return nil, err
}
}
var err error
t.children, err = transformChildren(parent)
return t.children, err
}
// Returns the children of a tree as treenoders.
// Efficiency is key here.
func transformChildren(t *Tree) ([]noder.Noder, error) {
var err error
var e TreeEntry
// there will be more tree entries than children in the tree,
// due to submodules and empty directories, but I think it is still
// worth it to pre-allocate the whole array now, even if sometimes
// is bigger than needed.
ret := make([]noder.Noder, 0, len(t.Entries))
walker := NewTreeWalker(t, false, nil) // don't recurse
// don't defer walker.Close() for efficiency reasons.
for {
_, e, err = walker.Next()
if err == io.EOF {
break
}
if err != nil {
walker.Close()
return nil, err
}
ret = append(ret, &treeNoder{
parent: t,
name: e.Name,
mode: e.Mode,
hash: e.Hash,
})
}
walker.Close()
return ret, nil
}
// len(t.tree.Entries) != the number of elements walked by treewalker
// for some reason because of empty directories, submodules, etc, so we
// have to walk here.
func (t *treeNoder) NumChildren() (int, error) {
children, err := t.Children()
if err != nil {
return 0, err
}
return len(children), nil
}
// Package transport implements the git pack protocol with a pluggable
// This is a low-level package to implement new transports. Use a concrete
// implementation instead (e.g. http, file, ssh).
//
// A simple example of usage can be found in the file package.
package transport
import (
"context"
"errors"
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/protocol"
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband"
"github.com/go-git/go-git/v6/storage"
)
var (
// ErrUnsupportedVersion is returned when the protocol version is not
// supported.
ErrUnsupportedVersion = errors.New("unsupported protocol version")
// ErrUnsupportedService is returned when the service is not supported.
ErrUnsupportedService = errors.New("unsupported service")
// ErrInvalidResponse is returned when the response is invalid.
ErrInvalidResponse = errors.New("invalid response")
// ErrTimeoutExceeded is returned when the timeout is exceeded.
ErrTimeoutExceeded = errors.New("timeout exceeded")
// ErrPackedObjectsNotSupported is returned when the server does not support
// packed objects.
ErrPackedObjectsNotSupported = errors.New("packed objects not supported")
)
// RemoteError represents an error returned by the remote.
// TODO: embed error
type RemoteError struct {
Reason string
}
// Error implements the error interface.
func (e *RemoteError) Error() string {
return e.Reason
}
// NewRemoteError creates a new RemoteError.
func NewRemoteError(reason string) error {
return &RemoteError{Reason: reason}
}
// Connection represents a session endpoint connection.
type Connection interface {
// Close closes the connection.
Close() error
// Capabilities returns the list of capabilities supported by the server.
Capabilities() *capability.List
// Version returns the Git protocol version the server supports.
Version() protocol.Version
// StatelessRPC indicates that the connection is a half-duplex connection
// and should operate in half-duplex mode i.e. performs a single read-write
// cycle. This fits with the HTTP POST request process where session may
// read the request, write a response, and exit.
StatelessRPC() bool
// GetRemoteRefs returns the references advertised by the remote.
// Using protocol v0 or v1, this returns the references advertised by the
// remote during the handshake. Using protocol v2, this runs the ls-refs
// command on the remote.
// This will error if the session is not already established using
// Handshake.
GetRemoteRefs(ctx context.Context) ([]*plumbing.Reference, error)
// Fetch sends a fetch-pack request to the server.
Fetch(ctx context.Context, req *FetchRequest) error
// Push sends a send-pack request to the server.
Push(ctx context.Context, req *PushRequest) error
}
var _ io.Closer = Connection(nil)
// FetchRequest contains the parameters for a fetch-pack request.
// This is used during the pack negotiation phase of the fetch operation.
// See https://git-scm.com/docs/pack-protocol#_packfile_negotiation
type FetchRequest struct {
// Progress is the progress sideband.
Progress sideband.Progress
// Wants is the list of references to fetch.
// TODO: Build this slice in the transport package.
Wants []plumbing.Hash
// Haves is the list of references the client already has.
// TODO: Build this slice in the transport package.
Haves []plumbing.Hash
// Depth is the depth of the fetch.
Depth int
// Filter holds the filters to be applied when deciding what
// objects will be added to the packfile.
Filter packp.Filter
// IncludeTags indicates whether tags should be fetched.
IncludeTags bool
}
// PushRequest contains the parameters for a push request.
type PushRequest struct {
// Packfile is the packfile reader.
Packfile io.ReadCloser
// Commands is the list of push commands to be sent to the server.
// TODO: build the Commands slice in the transport package.
Commands []*packp.Command
// Progress is the progress sideband.
Progress sideband.Progress
// Options is a set of push-options to be sent to the server during push.
Options []string
// Atomic indicates an atomic push.
// If the server supports atomic push, it will update the refs in one
// atomic transaction. Either all refs are updated or none.
Atomic bool
// Quiet indicates whether the server should suppress human-readable
// output.
Quiet bool
}
// Session is a Git protocol transfer session.
// This is used by all protocols.
type Session interface {
// Handshake starts the negotiation with the remote to get version if not
// already connected.
// Params are the optional extra parameters to be sent to the server. Use
// this to send the protocol version of the client and any other extra parameters.
Handshake(ctx context.Context, service Service, params ...string) (Connection, error)
}
// Commander creates Command instances. This is the main entry point for
// transport implementations.
type Commander interface {
// Connect creates a new Command for the given git command and
// endpoint. cmd can be git-upload-pack or git-receive-pack. An
// error should be returned if the endpoint is not supported or the
// command cannot be created (e.g. binary does not exist, connection
// cannot be established).
Command(ctx context.Context, cmd string, ep *Endpoint, auth AuthMethod, params ...string) (Command, error)
}
// Command is used for a single command execution.
// This interface is modeled after exec.Cmd and ssh.Session in the standard
// library.
type Command interface {
// StderrPipe returns a pipe that will be connected to the command's
// standard error when the command starts. It should not be called after
// Start.
StderrPipe() (io.Reader, error)
// StdinPipe returns a pipe that will be connected to the command's
// standard input when the command starts. It should not be called after
// Start. The pipe should be closed when no more input is expected.
StdinPipe() (io.WriteCloser, error)
// StdoutPipe returns a pipe that will be connected to the command's
// standard output when the command starts. It should not be called after
// Start.
StdoutPipe() (io.Reader, error)
// Start starts the specified command. It does not wait for it to
// complete.
Start() error
// Close closes the command and releases any resources used by it. It
// will block until the command exits.
Close() error
}
// CommandKiller expands the Command interface, enabling it for being killed.
type CommandKiller interface {
// Kill and close the session whatever the state it is. It will block until
// the command is terminated.
Kill() error
}
type client struct {
cmdr Commander
}
// NewPackTransport creates a new client using the given Commander.
func NewPackTransport(runner Commander) Transport {
return &client{runner}
}
// NewSession returns a new session for an endpoint.
func (c *client) NewSession(st storage.Storer, ep *Endpoint, auth AuthMethod) (Session, error) {
return NewPackSession(st, ep, auth, c.cmdr)
}
// SupportedProtocols returns a list of supported Git protocol versions by
// the transport client.
func (c *client) SupportedProtocols() []protocol.Version {
return []protocol.Version{
protocol.V0,
protocol.V1,
}
}
package transport
import (
"context"
"io"
"github.com/go-git/go-git/v6/plumbing/format/packfile"
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband"
"github.com/go-git/go-git/v6/storage"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// FetchPack fetches a packfile from the remote connection into the given
// storage repository and updates the shallow information.
func FetchPack(
ctx context.Context,
st storage.Storer,
conn Connection,
packf io.ReadCloser,
shallowInfo *packp.ShallowUpdate,
req *FetchRequest,
) (err error) {
packf = ioutil.NewContextReadCloser(ctx, packf)
// Do we have sideband enabled?
var demuxer *sideband.Demuxer
var reader io.Reader = packf
caps := conn.Capabilities()
if caps.Supports(capability.Sideband64k) {
demuxer = sideband.NewDemuxer(sideband.Sideband64k, reader)
} else if caps.Supports(capability.Sideband) {
demuxer = sideband.NewDemuxer(sideband.Sideband, reader)
}
if demuxer != nil && req.Progress != nil {
demuxer.Progress = req.Progress
reader = demuxer
}
if err := packfile.UpdateObjectStorage(st, reader); err != nil {
return err
}
if err := packf.Close(); err != nil {
return err
}
// Update shallow
if shallowInfo != nil {
if err := updateShallow(st, shallowInfo); err != nil {
return err
}
}
return nil
}
func updateShallow(st storage.Storer, shallowInfo *packp.ShallowUpdate) error {
shallows, err := st.Shallow()
if err != nil {
return err
}
outer:
for _, s := range shallowInfo.Shallows {
for _, oldS := range shallows {
if s == oldS {
continue outer
}
}
shallows = append(shallows, s)
}
// unshallow commits
for _, s := range shallowInfo.Unshallows {
for i, oldS := range shallows {
if s == oldS {
shallows = append(shallows[:i], shallows[i+1:]...)
break
}
}
}
return st.SetShallow(shallows)
}
package transport
import (
"path/filepath"
"github.com/go-git/go-billy/v6"
"github.com/go-git/go-billy/v6/osfs"
"github.com/go-git/go-git/v6/plumbing/cache"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/storage"
"github.com/go-git/go-git/v6/storage/filesystem"
)
// DefaultLoader is a filesystem loader ignoring host and resolving paths to /.
var DefaultLoader = NewFilesystemLoader(osfs.New(""), false)
// Loader loads repository's storer.Storer based on an optional host and a path.
type Loader interface {
// Load loads a storer.Storer given a transport.Endpoint.
// Returns transport.ErrRepositoryNotFound if the repository does not
// exist.
Load(ep *Endpoint) (storage.Storer, error)
}
// FilesystemLoader is a Loader that uses a billy.Filesystem to load
// repositories from the file system. It ignores the host and resolves paths to
// the given base filesystem.
type FilesystemLoader struct {
base billy.Filesystem
strict bool
}
// NewFilesystemLoader creates a Loader that ignores host and resolves paths
// with a given base filesystem.
func NewFilesystemLoader(base billy.Filesystem, strict bool) Loader {
return &FilesystemLoader{base, strict}
}
// Load looks up the endpoint's path in the base file system and returns a
// storer for it. Returns transport.ErrRepositoryNotFound if a repository does
// not exist in the given path.
func (l *FilesystemLoader) Load(ep *Endpoint) (storage.Storer, error) {
return l.load(ep.Path, false)
}
func (l *FilesystemLoader) load(path string, tried bool) (storage.Storer, error) {
fs, err := l.base.Chroot(path)
if err != nil {
return nil, err
}
if _, err := fs.Stat("config"); err != nil {
if !l.strict && !tried {
tried = true
if fi, err := fs.Stat(".git"); err == nil && fi.IsDir() {
path = filepath.Join(path, ".git")
} else {
path = path + ".git"
}
return l.load(path, tried)
}
return nil, ErrRepositoryNotFound
}
return filesystem.NewStorageWithOptions(fs, cache.NewObjectLRUDefault(), filesystem.Options{}), nil
}
// MapLoader is a Loader that uses a lookup map of storer.Storer by
// transport.Endpoint.
type MapLoader map[string]storer.Storer
// Load returns a storer.Storer for given a transport.Endpoint by looking it up
// in the map. Returns transport.ErrRepositoryNotFound if the endpoint does not
// exist.
func (l MapLoader) Load(ep *Endpoint) (storer.Storer, error) {
s, ok := l[ep.String()]
if !ok {
return nil, ErrRepositoryNotFound
}
return s, nil
}
package transport
import (
"bytes"
"context"
"io"
"github.com/go-git/go-git/v6/utils/ioutil"
)
type mockCommand struct {
stdin bytes.Buffer
stdout bytes.Buffer
stderr bytes.Buffer
}
func (c mockCommand) StderrPipe() (io.Reader, error) {
return &c.stderr, nil
}
func (c mockCommand) StdinPipe() (io.WriteCloser, error) {
return ioutil.WriteNopCloser(&c.stdin), nil
}
func (c mockCommand) StdoutPipe() (io.Reader, error) {
return &c.stdout, nil
}
func (c mockCommand) Start() error {
return nil
}
func (c mockCommand) Close() error {
return nil
}
type mockCommander struct {
stderr string
}
func (c mockCommander) Command(_ context.Context, _ string, _ *Endpoint, _ AuthMethod, _ ...string) (Command, error) {
return &mockCommand{
stderr: *bytes.NewBufferString(c.stderr),
}, nil
}
package transport
import (
"context"
"errors"
"fmt"
"io"
"slices"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/format/pktline"
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v6/storage"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// Negotiation errors.
var (
ErrFilterNotSupported = errors.New("server does not support filters")
ErrShallowNotSupported = errors.New("server does not support shallow clients")
)
// NegotiatePack returns the result of the pack negotiation phase of the fetch operation.
// See https://git-scm.com/docs/pack-protocol#_packfile_negotiation
func NegotiatePack(
ctx context.Context,
st storage.Storer,
conn Connection,
reader io.Reader,
writer io.WriteCloser,
req *FetchRequest,
) (shallowInfo *packp.ShallowUpdate, err error) {
reader = ioutil.NewContextReader(ctx, reader)
writer = ioutil.NewContextWriteCloser(ctx, writer)
caps := conn.Capabilities()
// Create upload-request
upreq := packp.NewUploadRequest()
multiAck := caps.Supports(capability.MultiACK)
multiAckDetailed := caps.Supports(capability.MultiACKDetailed)
if multiAckDetailed {
upreq.Capabilities.Set(capability.MultiACKDetailed) //nolint:errcheck
} else if multiAck {
upreq.Capabilities.Set(capability.MultiACK) //nolint:errcheck
}
if req.Progress != nil {
if caps.Supports(capability.Sideband64k) {
upreq.Capabilities.Set(capability.Sideband64k) //nolint:errcheck
} else if caps.Supports(capability.Sideband) {
upreq.Capabilities.Set(capability.Sideband) //nolint:errcheck
}
} else if caps.Supports(capability.NoProgress) {
upreq.Capabilities.Set(capability.NoProgress) //nolint:errcheck
}
// TODO: support thin-pack
// if caps.Supports(capability.ThinPack) {
// upreq.Capabilities.Set(capability.ThinPack) // nolint: errcheck
// }
if caps.Supports(capability.OFSDelta) {
upreq.Capabilities.Set(capability.OFSDelta) //nolint:errcheck
}
if caps.Supports(capability.Agent) {
upreq.Capabilities.Set(capability.Agent, capability.DefaultAgent()) //nolint:errcheck
}
if req.IncludeTags && caps.Supports(capability.IncludeTag) {
upreq.Capabilities.Set(capability.IncludeTag) //nolint:errcheck
}
if req.Filter != "" {
if caps.Supports(capability.Filter) {
upreq.Filter = req.Filter
if err := upreq.Capabilities.Set(capability.Filter); err != nil {
return nil, err
}
} else {
return nil, ErrFilterNotSupported
}
}
upreq.Wants = req.Wants
if req.Depth > 0 {
if !caps.Supports(capability.Shallow) {
return nil, ErrShallowNotSupported
}
upreq.Depth = packp.DepthCommits(req.Depth)
upreq.Shallows, err = st.Shallow()
if err != nil {
return nil, err
}
}
// Note: empty request means haves are a subset of wants, in that case we have
// everything we asked for. Close the connection and return nil.
if isSubset(req.Wants, req.Haves) && len(upreq.Shallows) == 0 {
if err := pktline.WriteFlush(writer); err != nil {
return nil, err
}
// Close the writer to signal the end of the request
if err := writer.Close(); err != nil {
return nil, fmt.Errorf("closing writer: %s", err)
}
return nil, ErrNoChange
}
// Create upload-haves
common := map[plumbing.Hash]struct{}{}
var inVein int
var done bool
var gotContinue bool // whether we got a continue from the server
firstRound := true
for !done {
// Pop the last 32 or depth have commits from the pending list and
// insert their parents into the pending list.
// TODO: Properly build and implement haves negotiation, and move it
// from remote.go to this package.
var uphav packp.UploadHaves
for i := 0; i < 32 && len(req.Haves) > 0; i++ {
uphav.Haves = append(uphav.Haves, req.Haves[len(req.Haves)-1])
req.Haves = req.Haves[:len(req.Haves)-1]
inVein++
}
// Let the server know we're done
const maxInVein = 256
done = len(req.Haves) == 0 || (gotContinue && inVein >= maxInVein)
uphav.Done = done
// Note: empty request means haves are a subset of wants, in that case we have
// everything we asked for. Close the connection and return nil.
if isSubset(req.Wants, uphav.Haves) && len(upreq.Shallows) == 0 {
if err := pktline.WriteFlush(writer); err != nil {
return nil, err
}
// Close the writer to signal the end of the request
if err := writer.Close(); err != nil {
return nil, fmt.Errorf("closing writer: %s", err)
}
return nil, ErrNoChange
}
// Begin the upload-pack negotiation
if firstRound || conn.StatelessRPC() {
if err := upreq.Encode(writer); err != nil {
return nil, fmt.Errorf("sending upload-request: %w", err)
}
}
readc := make(chan error)
if !conn.StatelessRPC() {
go func() { readc <- readShallows(conn, reader, req, &shallowInfo, firstRound) }()
}
// Encode upload-haves
if err := uphav.Encode(writer); err != nil {
return nil, fmt.Errorf("sending upload-haves: %w", err)
}
// Close the writer to signal the end of the request
if conn.StatelessRPC() {
if err := writer.Close(); err != nil {
return nil, fmt.Errorf("closing writer: %w", err)
}
if err := readShallows(conn, reader, req, &shallowInfo, firstRound); err != nil {
return nil, err
}
} else {
// Wait for the read channel to be closed
if err := <-readc; err != nil {
return nil, err
}
}
go func() {
defer close(readc)
if done || len(uphav.Haves) > 0 {
var srvrs packp.ServerResponse
if err := srvrs.Decode(reader); err != nil {
readc <- fmt.Errorf("decoding server-response: %w", err)
return
}
for _, ack := range srvrs.ACKs {
if !gotContinue && ack.Status > 0 {
gotContinue = true
}
if ack.Status == packp.ACKCommon {
common[ack.Hash] = struct{}{}
}
}
}
readc <- nil
}()
// Wait for the read channel to be closed
if err := <-readc; err != nil {
return nil, err
}
firstRound = false
}
if !conn.StatelessRPC() {
if err := writer.Close(); err != nil {
return nil, fmt.Errorf("closing writer: %w", err)
}
}
return shallowInfo, nil
}
func isSubset(needle, haystack []plumbing.Hash) bool {
for _, h := range needle {
if !slices.Contains(haystack, h) {
return false
}
}
return true
}
func readShallows(
conn Connection,
r io.Reader,
req *FetchRequest,
shallowInfo **packp.ShallowUpdate,
firstRound bool,
) error {
// Decode shallow-update
// If depth is not zero, then we expect a shallow update from the
// server.
if (firstRound || conn.StatelessRPC()) && req.Depth > 0 {
var shupd packp.ShallowUpdate
if err := shupd.Decode(r); err != nil {
return fmt.Errorf("decoding shallow-update: %w", err)
}
// Only return the first shallow update
if shallowInfo == nil {
shallowInfo = new(*packp.ShallowUpdate)
*shallowInfo = &shupd
}
}
return nil
}
package transport
import (
"bufio"
"bytes"
"context"
"io"
"strings"
"sync/atomic"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/protocol"
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v6/storage"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// NewPackSession creates a new session that implements a full-duplex Git pack protocol.
func NewPackSession(
st storage.Storer,
ep *Endpoint,
auth AuthMethod,
cmdr Commander,
) (Session, error) {
ps := &PackSession{
ep: ep,
auth: auth,
cmdr: cmdr,
st: st,
}
return ps, nil
}
// PackSession is a session that implements a full-duplex Git pack transport.
type PackSession struct {
cmdr Commander
ep *Endpoint
auth AuthMethod
st storage.Storer
}
var _ Session = &PackSession{}
// Handshake implements Session.
func (p *PackSession) Handshake(ctx context.Context, service Service, params ...string) (conn Connection, err error) {
switch service {
case UploadPackService, ReceivePackService:
// do nothing
default:
return nil, ErrUnsupportedService
}
cmd, err := p.cmdr.Command(ctx, service.String(), p.ep, p.auth, params...)
if err != nil {
return nil, err
}
c := &packConnection{
st: p.st,
cmd: cmd,
svc: service,
}
// Check if the context is already done before starting the command.
if ctx.Err() != nil {
return nil, ctx.Err()
}
stdin, err := cmd.StdinPipe()
if err != nil {
return nil, err
}
c.w = stdin
stdout, err := cmd.StdoutPipe()
if err != nil {
return nil, err
}
cr := ioutil.NewContextReaderWithCloser(ctx, stdout, cmd)
c.r = bufio.NewReader(cr)
stderr, err := cmd.StderrPipe()
if err != nil {
return nil, err
}
// Some transports like Git doesn't support stderr, so we need to check if
// it's not nil before starting to read it.
if stderr != nil {
go func() {
var buf bytes.Buffer
_, _ = ioutil.CopyBufferPool(&buf, stderr)
c.stderrBuf.Store(&buf)
}()
}
// Check if stderr is not empty before returning.
defer func() { checkError(c.stderr(), &err) }()
if err := cmd.Start(); err != nil {
_ = cmd.Close()
return nil, err
}
c.version, err = DiscoverVersion(c.r)
if err != nil {
return nil, err
}
switch c.version {
case protocol.V2:
return nil, ErrUnsupportedVersion
case protocol.V1:
// Read the version line
fallthrough
case protocol.V0:
}
ar := packp.NewAdvRefs()
if err := ar.Decode(c.r); err != nil {
return nil, err
}
c.refs = ar
c.caps = ar.Capabilities
return c, nil
}
// packConnection is a convenience type that implements io.ReadWriteCloser.
type packConnection struct {
st storage.Storer
cmd Command
svc Service
w io.WriteCloser // stdin
r *bufio.Reader // stdout
stderrBuf atomic.Pointer[bytes.Buffer]
version protocol.Version
caps *capability.List
refs *packp.AdvRefs
}
var _ Connection = &packConnection{}
// stderr returns stderr of the command if it's not empty. This will always
// return a RemoteError.
func (p *packConnection) stderr() error {
buf := p.stderrBuf.Load()
if buf == nil {
return nil
}
s := strings.TrimSpace(buf.String())
if s == "" {
return nil
}
return NewRemoteError(s)
}
// Close implements Connection.
func (p *packConnection) Close() error {
return p.cmd.Close()
}
// Capabilities implements Connection.
func (p *packConnection) Capabilities() *capability.List {
return p.caps
}
// GetRemoteRefs implements Connection.
func (p *packConnection) GetRemoteRefs(_ context.Context) ([]*plumbing.Reference, error) {
if p.refs == nil {
// TODO: return appropriate error
return nil, ErrEmptyRemoteRepository
}
// Some servers like jGit, announce capabilities instead of returning an
// packp message with a flush. This verifies that we received a empty
// adv-refs, even if it contains capabilities.
forPush := p.svc == ReceivePackService
if !forPush && p.refs.IsEmpty() {
return nil, ErrEmptyRemoteRepository
}
return p.refs.MakeReferenceSlice()
}
// Version implements Connection.
func (p *packConnection) Version() protocol.Version {
return p.version
}
// StatelessRPC implements Connection.
func (*packConnection) StatelessRPC() bool {
return false
}
// Fetch implements Connection.
func (p *packConnection) Fetch(ctx context.Context, req *FetchRequest) (err error) {
shallows, err := NegotiatePack(ctx, p.st, p, p.r, p.w, req)
if err != nil {
return err
}
return FetchPack(ctx, p.st, p, io.NopCloser(p.r), shallows, req)
}
// Push implements Connection.
func (p *packConnection) Push(ctx context.Context, req *PushRequest) (err error) {
return SendPack(ctx, p.st, p, p.w, io.NopCloser(p.r), req)
}
// checkError checks if the error is not nil updates the pointer with the
// error.
func checkError(err error, perr *error) {
if err != nil {
*perr = err
}
}
package transport
import (
"context"
"errors"
"fmt"
"io"
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband"
"github.com/go-git/go-git/v6/storage"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// buildUpdateRequests constructs a new update-requests object for the given
// connection and push request.
func buildUpdateRequests(caps *capability.List, req *PushRequest) *packp.UpdateRequests {
upreq := packp.NewUpdateRequests()
// The atomic, report-status, report-status-v2, delete-refs, quiet, and
// push-cert capabilities are sent and recognized by the receive-pack (push
// to server) process.
//
// The ofs-delta and side-band-64k capabilities are sent and recognized by
// both upload-pack and receive-pack protocols. The agent and session-id
// capabilities may optionally be sent in both protocols.
//
// All other capabilities are only recognized by the upload-pack (fetch
// from server) process.
//
// In addition to the ones listed above, receive-pack special capabilities
// include object-format and push-options.
//
// However, upstream Git does *not* send all of these capabilities by
// client side. See
// https://github.com/git/git/blob/485f5f863615e670fd97ae40af744e14072cfe18/send-pack.c#L589
// for more details.
//
// See https://git-scm.com/docs/gitprotocol-capabilities for more details.
if caps.Supports(capability.ReportStatus) {
upreq.Capabilities.Set(capability.ReportStatus) //nolint:errcheck
}
if req.Progress != nil {
if caps.Supports(capability.Sideband64k) {
upreq.Capabilities.Set(capability.Sideband64k) //nolint:errcheck
} else if caps.Supports(capability.Sideband) {
upreq.Capabilities.Set(capability.Sideband) //nolint:errcheck
}
if req.Quiet && caps.Supports(capability.Quiet) {
upreq.Capabilities.Set(capability.Quiet) //nolint:errcheck
}
}
if req.Atomic && caps.Supports(capability.Atomic) {
upreq.Capabilities.Set(capability.Atomic) //nolint:errcheck
}
if len(req.Options) > 0 && caps.Supports(capability.PushOptions) {
upreq.Capabilities.Set(capability.PushOptions) //nolint:errcheck
}
if caps.Supports(capability.Agent) {
upreq.Capabilities.Set(capability.Agent, capability.DefaultAgent()) //nolint:errcheck
}
upreq.Commands = req.Commands
return upreq
}
// SendPack is a function that sends a packfile to a remote server.
func SendPack(
ctx context.Context,
_ storage.Storer,
conn Connection,
writer io.WriteCloser,
reader io.ReadCloser,
req *PushRequest,
) error {
writer = ioutil.NewContextWriteCloser(ctx, writer)
reader = ioutil.NewContextReadCloser(ctx, reader)
var needPackfile bool
for _, cmd := range req.Commands {
if cmd.Action() != packp.Delete {
needPackfile = true
break
}
}
if !needPackfile && req.Packfile != nil {
return fmt.Errorf("packfile is not accepted for push request without new objects")
}
if needPackfile && req.Packfile == nil {
return fmt.Errorf("packfile is required for push request with new objects")
}
caps := conn.Capabilities()
upreq := buildUpdateRequests(caps, req)
if err := upreq.Encode(writer); err != nil {
return err
}
if upreq.Capabilities.Supports(capability.PushOptions) {
var opts packp.PushOptions
opts.Options = req.Options
if err := opts.Encode(writer); err != nil {
return fmt.Errorf("encoding push-options: %w", err)
}
}
// Send the packfile.
if req.Packfile != nil {
if _, err := ioutil.CopyBufferPool(writer, req.Packfile); err != nil {
return err
}
if err := req.Packfile.Close(); err != nil {
return fmt.Errorf("closing packfile: %w", err)
}
}
// Close the write pipe to signal the end of the request.
if err := writer.Close(); err != nil {
return err
}
var reportStatus int // 0 no support, 1 v1, 2 v2
if upreq.Capabilities.Supports(capability.ReportStatusV2) {
reportStatus = 2
} else if upreq.Capabilities.Supports(capability.ReportStatus) {
reportStatus = 1
}
if reportStatus == 0 {
// If we don't have report-status, we're done here.
return nil
}
var r io.Reader = reader
if req.Progress != nil {
var d *sideband.Demuxer
if upreq.Capabilities.Supports(capability.Sideband64k) {
d = sideband.NewDemuxer(sideband.Sideband64k, reader)
} else if upreq.Capabilities.Supports(capability.Sideband) {
d = sideband.NewDemuxer(sideband.Sideband, reader)
}
if d != nil {
if !upreq.Capabilities.Supports(capability.Quiet) {
// If we want quiet mode, we don't report progress messages
// which means the demuxer won't have a progress writer.
d.Progress = req.Progress
}
r = d
}
}
report := packp.NewReportStatus()
if err := report.Decode(r); err != nil {
return fmt.Errorf("decode report-status: %w", err)
}
reportError := report.Error()
// Read any remaining progress messages.
if reportStatus > 0 && len(upreq.Commands) > 0 {
_, err := io.ReadAll(r)
if err != nil && !errors.Is(err, io.EOF) {
_ = reader.Close()
if reportError != nil {
return reportError
}
return fmt.Errorf("reading progress messages: %w", err)
}
}
if err := reader.Close(); err != nil {
if reportError != nil {
return reportError
}
return fmt.Errorf("closing reader: %w", err)
}
return reportError
}
package transport
import (
"bufio"
"context"
"fmt"
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/format/packfile"
"github.com/go-git/go-git/v6/plumbing/format/pktline"
"github.com/go-git/go-git/v6/plumbing/protocol"
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/storage"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// ReceivePackOptions is a set of options for the ReceivePack service.
type ReceivePackOptions struct {
GitProtocol string
AdvertiseRefs bool
StatelessRPC bool
}
// ReceivePack is a server command that serves the receive-pack service.
// TODO: support hooks
func ReceivePack(
ctx context.Context,
st storage.Storer,
r io.ReadCloser,
w io.WriteCloser,
opts *ReceivePackOptions,
) error {
if w == nil {
return fmt.Errorf("nil writer")
}
w = ioutil.NewContextWriteCloser(ctx, w)
if opts == nil {
opts = &ReceivePackOptions{}
}
if opts.AdvertiseRefs || !opts.StatelessRPC {
switch version := ProtocolVersion(opts.GitProtocol); version {
case protocol.V1:
if _, err := pktline.Writef(w, "version %d\n", version); err != nil {
return err
}
// TODO: support version 2
case protocol.V0, protocol.V2:
default:
return fmt.Errorf("%w: %q", ErrUnsupportedVersion, version)
}
if err := AdvertiseReferences(ctx, st, w, ReceivePackService, opts.StatelessRPC); err != nil {
return err
}
}
if opts.AdvertiseRefs {
// Done, there's nothing else to do
return nil
}
if r == nil {
return fmt.Errorf("nil reader")
}
r = ioutil.NewContextReadCloser(ctx, r)
rd := bufio.NewReader(r)
l, _, err := pktline.PeekLine(rd)
if err != nil {
return err
}
// At this point, if we get a flush packet, it means the client
// has nothing to send, so we can return early.
if l == pktline.Flush {
return nil
}
updreq := packp.NewUpdateRequests()
if err := updreq.Decode(rd); err != nil {
return err
}
var (
caps = updreq.Capabilities
needPackfile bool
pushOpts packp.PushOptions
)
// TODO: Pass the options to the server-side hooks.
if updreq.Capabilities.Supports(capability.PushOptions) {
if err := pushOpts.Decode(rd); err != nil {
return fmt.Errorf("decoding push-options: %w", err)
}
}
// Should we expect a packfile?
for _, cmd := range updreq.Commands {
if cmd.Action() != packp.Delete {
needPackfile = true
break
}
}
// Receive the packfile
var unpackErr error
if needPackfile {
unpackErr = packfile.UpdateObjectStorage(st, rd)
}
// Done with the request, now close the reader
// to indicate that we are done reading from it.
if err := r.Close(); err != nil {
return fmt.Errorf("closing reader: %w", err)
}
// Report status if the client supports it
if !updreq.Capabilities.Supports(capability.ReportStatus) {
return unpackErr
}
var (
useSideband bool
writer io.Writer = w
)
if !caps.Supports(capability.NoProgress) {
if caps.Supports(capability.Sideband64k) {
writer = sideband.NewMuxer(sideband.Sideband64k, w)
useSideband = true
} else if caps.Supports(capability.Sideband) {
writer = sideband.NewMuxer(sideband.Sideband, w)
useSideband = true
}
}
writeCloser := ioutil.NewWriteCloser(writer, w)
if unpackErr != nil {
res := sendReportStatus(writeCloser, unpackErr, nil)
closeWriter(w)
return res
}
var firstErr error
cmdStatus := make(map[plumbing.ReferenceName]error)
updateReferences(st, updreq, cmdStatus, &firstErr)
if err := sendReportStatus(writeCloser, firstErr, cmdStatus); err != nil {
return err
}
if useSideband {
if err := pktline.WriteFlush(w); err != nil {
return fmt.Errorf("flushing sideband: %w", err)
}
}
if firstErr != nil {
return firstErr
}
return closeWriter(w)
}
func closeWriter(w io.WriteCloser) error {
if err := w.Close(); err != nil {
return fmt.Errorf("closing writer: %w", err)
}
return nil
}
func sendReportStatus(w io.WriteCloser, unpackErr error, cmdStatus map[plumbing.ReferenceName]error) error {
rs := packp.NewReportStatus()
rs.UnpackStatus = "ok"
if unpackErr != nil {
rs.UnpackStatus = unpackErr.Error()
}
for ref, err := range cmdStatus {
msg := "ok"
if err != nil {
msg = err.Error()
}
status := &packp.CommandStatus{
ReferenceName: ref,
Status: msg,
}
rs.CommandStatuses = append(rs.CommandStatuses, status)
}
if err := rs.Encode(w); err != nil {
return err
}
return nil
}
func setStatus(cmdStatus map[plumbing.ReferenceName]error, firstErr *error, ref plumbing.ReferenceName, err error) {
cmdStatus[ref] = err
if *firstErr == nil && err != nil {
*firstErr = err
}
}
func referenceExists(s storer.ReferenceStorer, n plumbing.ReferenceName) (bool, error) {
_, err := s.Reference(n)
if err == plumbing.ErrReferenceNotFound {
return false, nil
}
return err == nil, err
}
func updateReferences(st storage.Storer, req *packp.UpdateRequests, cmdStatus map[plumbing.ReferenceName]error, firstErr *error) {
for _, cmd := range req.Commands {
exists, err := referenceExists(st, cmd.Name)
if err != nil {
setStatus(cmdStatus, firstErr, cmd.Name, err)
continue
}
switch cmd.Action() {
case packp.Create:
if exists {
setStatus(cmdStatus, firstErr, cmd.Name, ErrUpdateReference)
continue
}
ref := plumbing.NewHashReference(cmd.Name, cmd.New)
err := st.SetReference(ref)
setStatus(cmdStatus, firstErr, cmd.Name, err)
case packp.Delete:
if !exists {
setStatus(cmdStatus, firstErr, cmd.Name, ErrUpdateReference)
continue
}
err := st.RemoveReference(cmd.Name)
setStatus(cmdStatus, firstErr, cmd.Name, err)
case packp.Update:
if !exists {
setStatus(cmdStatus, firstErr, cmd.Name, ErrUpdateReference)
continue
}
ref := plumbing.NewHashReference(cmd.Name, cmd.New)
err := st.SetReference(ref)
setStatus(cmdStatus, firstErr, cmd.Name, err)
}
}
}
package transport
import (
"fmt"
"sync"
)
// registry are the protocols supported by default.
var (
registry = map[string]Transport{}
mtx sync.RWMutex
)
// Register adds or modifies an existing protocol.
// Equivalent to client.InstallProtocol in go-git before V6.
func Register(protocol string, c Transport) {
mtx.Lock()
registry[protocol] = c
mtx.Unlock()
}
// Unregister removes a protocol from the list of supported protocols.
func Unregister(scheme string) {
mtx.Lock()
delete(registry, scheme)
mtx.Unlock()
}
// Get returns the appropriate client for the given protocol.
func Get(p string) (Transport, error) {
mtx.RLock()
defer mtx.RUnlock()
f, ok := registry[p]
if !ok {
return nil, fmt.Errorf("unsupported scheme %q", p)
}
if f == nil {
return nil, fmt.Errorf("malformed client for scheme %q, client is defined as nil", p)
}
return f, nil
}
package transport
import (
"context"
"errors"
"fmt"
"io"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/object"
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/storage"
)
// ErrUpdateReference is returned when a reference update fails.
var ErrUpdateReference = errors.New("failed to update ref")
// AdvertiseReferences is a server command that implements the reference
// discovery phase of the Git transfer protocol.
func AdvertiseReferences(
_ context.Context,
st storage.Storer,
w io.Writer,
service Service,
smart bool,
) error {
switch service {
case UploadPackService, ReceivePackService:
default:
return fmt.Errorf("%w: %s", ErrUnsupportedService, service)
}
forPush := service == ReceivePackService
ar := packp.NewAdvRefs()
// Set server default capabilities
ar.Capabilities.Set(capability.Agent, capability.DefaultAgent()) //nolint:errcheck
ar.Capabilities.Set(capability.OFSDelta) //nolint:errcheck
ar.Capabilities.Set(capability.Sideband64k) //nolint:errcheck
if forPush {
// TODO: support thin-pack
ar.Capabilities.Set(capability.NoThin) //nolint:errcheck
// TODO: support atomic
ar.Capabilities.Set(capability.DeleteRefs) //nolint:errcheck
ar.Capabilities.Set(capability.ReportStatus) //nolint:errcheck
ar.Capabilities.Set(capability.PushOptions) //nolint:errcheck
ar.Capabilities.Set(capability.Quiet) //nolint:errcheck
} else {
// TODO: support include-tag
// TODO: support deepen
// TODO: support deepen-since
ar.Capabilities.Set(capability.MultiACK) //nolint:errcheck
ar.Capabilities.Set(capability.MultiACKDetailed) //nolint:errcheck
ar.Capabilities.Set(capability.Sideband) //nolint:errcheck
ar.Capabilities.Set(capability.NoProgress) //nolint:errcheck
ar.Capabilities.Set(capability.SymRef) //nolint:errcheck
ar.Capabilities.Set(capability.Shallow) //nolint:errcheck
}
// Set references
if err := addReferences(st, ar, !forPush); err != nil {
return err
}
if smart {
smartReply := packp.SmartReply{
Service: service.String(),
}
if err := smartReply.Encode(w); err != nil {
return fmt.Errorf("failed to encode smart reply: %w", err)
}
}
return ar.Encode(w)
}
func addReferences(st storage.Storer, ar *packp.AdvRefs, addHead bool) error {
iter, err := st.IterReferences()
if err != nil {
return err
}
// Add references and their peeled values
if err := iter.ForEach(func(r *plumbing.Reference) error {
hash, name := r.Hash(), r.Name()
switch r.Type() {
case plumbing.SymbolicReference:
ref, err := storer.ResolveReference(st, r.Target())
if errors.Is(err, plumbing.ErrReferenceNotFound) {
return nil
}
if err != nil {
return err
}
hash = ref.Hash()
}
if name == plumbing.HEAD {
if !addHead {
return nil
}
// Add default branch HEAD symref
ar.Capabilities.Add(capability.SymRef, fmt.Sprintf("%s:%s", name, r.Target())) //nolint:errcheck
ar.Head = &hash
}
ar.References[name.String()] = hash
if r.Name().IsTag() {
if tag, err := object.GetTag(st, hash); err == nil {
ar.Peeled[name.String()] = tag.Target
}
}
return nil
}); err != nil {
return err
}
return nil
}
package transport
import (
"fmt"
"github.com/go-git/go-billy/v6"
"github.com/go-git/go-git/v6/internal/repository"
"github.com/go-git/go-git/v6/plumbing/storer"
"github.com/go-git/go-git/v6/storage"
)
// UpdateServerInfo updates the server info files in the repository.
//
// It generates a list of available refs for the repository.
// Used by git http transport (dumb), for more information refer to:
// https://git-scm.com/book/id/v2/Git-Internals-Transfer-Protocols#_the_dumb_protocol
func UpdateServerInfo(s storage.Storer, fs billy.Filesystem) error {
pos, ok := s.(storer.PackedObjectStorer)
if !ok {
return ErrPackedObjectsNotSupported
}
infoRefs, err := fs.Create("info/refs")
if err != nil {
return err
}
defer infoRefs.Close() //nolint:errcheck
refsIter, err := s.IterReferences()
if err != nil {
return err
}
defer refsIter.Close()
if err := repository.WriteInfoRefs(infoRefs, s); err != nil {
return fmt.Errorf("failed to write info/refs: %w", err)
}
infoPacks, err := fs.Create("objects/info/packs")
if err != nil {
return err
}
defer infoPacks.Close() //nolint:errcheck
if err := repository.WriteObjectsInfoPacks(infoPacks, pos); err != nil {
return fmt.Errorf("failed to write objects/info/packs: %w", err)
}
return nil
}
package transport
import "strings"
// Service represents a Git transport service.
// All services are prefixed with "git-".
type Service string
// String returns the string representation of the service.
func (s Service) String() string {
return string(s)
}
// Name returns the name of the service without the "git-" prefix.
func (s Service) Name() string {
return strings.TrimPrefix(string(s), "git-")
}
// Git service names.
const (
UploadPackService Service = "git-upload-pack"
UploadArchiveService Service = "git-upload-archive"
ReceivePackService Service = "git-receive-pack"
)
// Package transport includes the implementation for different transport
// protocols.
//
// `Client` can be used to fetch and send packfiles to a git server.
// The `client` package provides higher level functions to instantiate the
// appropriate `Client` based on the repository URL.
//
// go-git supports HTTP and SSH (see `Protocols`), but you can also install
// your own protocols (see the `client` package).
//
// Each protocol has its own implementation of `Client`, but you should
// generally not use them directly, use `client.NewClient` instead.
package transport
import (
"errors"
"fmt"
"net"
"net/url"
"regexp"
"runtime"
"strings"
giturl "github.com/go-git/go-git/v6/internal/url"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/protocol"
"github.com/go-git/go-git/v6/storage"
)
// Transport errors.
var (
ErrRepositoryNotFound = errors.New("repository not found")
ErrEmptyRemoteRepository = errors.New("remote repository is empty")
ErrNoChange = errors.New("no change")
ErrAuthenticationRequired = errors.New("authentication required")
ErrAuthorizationFailed = errors.New("authorization failed")
ErrEmptyUploadPackRequest = errors.New("empty git-upload-pack given")
ErrInvalidAuthMethod = errors.New("invalid auth method")
ErrAlreadyConnected = errors.New("session already established")
ErrInvalidRequest = errors.New("invalid request")
)
// Transport can initiate git-upload-pack and git-receive-pack processes.
// It is implemented both by the client and the server, making this a RPC.
type Transport interface {
// NewSession returns a new session for an endpoint.
NewSession(storage.Storer, *Endpoint, AuthMethod) (Session, error)
// SupportedProtocols returns a list of supported Git protocol versions by
// the transport client.
SupportedProtocols() []protocol.Version
}
// AuthMethod defines the interface for authentication.
type AuthMethod interface {
fmt.Stringer
Name() string
}
// Endpoint represents a Git URL in any supported protocol.
type Endpoint struct {
url.URL
// InsecureSkipTLS skips ssl verify if protocol is https
InsecureSkipTLS bool
// CaBundle specify additional ca bundle with system cert pool
CaBundle []byte
// Proxy provides info required for connecting to a proxy.
Proxy ProxyOptions
}
// ProxyOptions provides configuration for proxy connections.
type ProxyOptions struct {
URL string
Username string
Password string
}
// Validate validates the proxy options.
func (o *ProxyOptions) Validate() error {
if o.URL != "" {
_, err := url.Parse(o.URL)
return err
}
return nil
}
// FullURL returns the full proxy URL including credentials.
func (o *ProxyOptions) FullURL() (*url.URL, error) {
proxyURL, err := url.Parse(o.URL)
if err != nil {
return nil, err
}
if o.Username != "" {
if o.Password != "" {
proxyURL.User = url.UserPassword(o.Username, o.Password)
} else {
proxyURL.User = url.User(o.Username)
}
}
return proxyURL, nil
}
var fileIssueWindows = regexp.MustCompile(`^/[A-Za-z]:(/|\\)`)
// NewEndpoint parses an endpoint string and returns an Endpoint.
func NewEndpoint(endpoint string) (*Endpoint, error) {
if e, ok := parseSCPLike(endpoint); ok {
return e, nil
}
if e, ok := parseFile(endpoint); ok {
return e, nil
}
return parseURL(endpoint)
}
func parseURL(endpoint string) (*Endpoint, error) {
if after, ok := strings.CutPrefix(endpoint, "file://"); ok {
endpoint = after
// When triple / is used, the path in Windows may end up having an
// additional / resulting in "/C:/Dir".
if runtime.GOOS == "windows" &&
fileIssueWindows.MatchString(endpoint) {
endpoint = endpoint[1:]
}
return &Endpoint{
URL: url.URL{
Scheme: "file",
Path: endpoint,
},
}, nil
}
u, err := url.Parse(endpoint)
if err != nil {
return nil, err
}
if !u.IsAbs() {
return nil, plumbing.NewPermanentError(fmt.Errorf(
"invalid endpoint: %s", endpoint,
))
}
return &Endpoint{
URL: *u,
}, nil
}
func parseSCPLike(endpoint string) (*Endpoint, bool) {
if giturl.MatchesScheme(endpoint) || !giturl.MatchesScpLike(endpoint) {
return nil, false
}
user, host, port, path := giturl.FindScpLikeComponents(endpoint)
if port != "" {
host = net.JoinHostPort(host, port)
}
return &Endpoint{
URL: url.URL{
Scheme: "ssh",
User: url.User(user),
Host: host,
Path: path,
},
}, true
}
func parseFile(endpoint string) (*Endpoint, bool) {
if giturl.MatchesScheme(endpoint) {
return nil, false
}
path := endpoint
return &Endpoint{
URL: url.URL{
Scheme: "file",
Path: path,
},
}, true
}
package transport
import (
"bufio"
"context"
"fmt"
"io"
"math"
"github.com/go-git/go-git/v6/plumbing"
"github.com/go-git/go-git/v6/plumbing/format/packfile"
"github.com/go-git/go-git/v6/plumbing/format/pktline"
"github.com/go-git/go-git/v6/plumbing/object"
"github.com/go-git/go-git/v6/plumbing/protocol"
"github.com/go-git/go-git/v6/plumbing/protocol/packp"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/capability"
"github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband"
"github.com/go-git/go-git/v6/plumbing/revlist"
"github.com/go-git/go-git/v6/storage"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// UploadPackOptions is a set of options for the UploadPack service.
type UploadPackOptions struct {
GitProtocol string
AdvertiseRefs bool
StatelessRPC bool
}
// UploadPack is a server command that serves the upload-pack service.
func UploadPack(
ctx context.Context,
st storage.Storer,
r io.ReadCloser,
w io.WriteCloser,
opts *UploadPackOptions,
) error {
if w == nil {
return fmt.Errorf("nil writer")
}
w = ioutil.NewContextWriteCloser(ctx, w)
if opts == nil {
opts = &UploadPackOptions{}
}
if opts.AdvertiseRefs || !opts.StatelessRPC {
switch version := ProtocolVersion(opts.GitProtocol); version {
case protocol.V1:
if _, err := pktline.Writef(w, "version %d\n", version); err != nil {
return err
}
// TODO: support version 2
case protocol.V0, protocol.V2:
default:
return fmt.Errorf("%w: %q", ErrUnsupportedVersion, version)
}
if err := AdvertiseReferences(ctx, st, w, UploadPackService, opts.StatelessRPC); err != nil {
return fmt.Errorf("advertising references: %w", err)
}
}
if opts.AdvertiseRefs {
// Done, there's nothing else to do
return nil
}
if r == nil {
return fmt.Errorf("nil reader")
}
r = ioutil.NewContextReadCloser(ctx, r)
rd := bufio.NewReader(r)
l, _, err := pktline.PeekLine(rd)
if err != nil {
return fmt.Errorf("peeking line: %w", err)
}
// In case the client has nothing to send, it sends a flush packet to
// indicate that it is done sending data. In that case, we're done
// here.
if l == pktline.Flush {
return nil
}
var done bool
var haves []plumbing.Hash
var upreq *packp.UploadRequest
var havesWithRef map[plumbing.Hash][]plumbing.Hash
var multiAck, multiAckDetailed bool
var caps *capability.List
var wants []plumbing.Hash
firstRound := true
for !done {
writec := make(chan error)
if firstRound || opts.StatelessRPC {
upreq = packp.NewUploadRequest()
if err := upreq.Decode(rd); err != nil {
return fmt.Errorf("decoding upload-request: %w", err)
}
wants = upreq.Wants
caps = upreq.Capabilities
if err := r.Close(); err != nil {
return fmt.Errorf("closing reader: %w", err)
}
// Find common commits/objects
havesWithRef, err = revlist.ObjectsWithRef(st, wants, nil)
if err != nil {
return fmt.Errorf("getting objects with ref: %w", err)
}
// Encode objects to packfile and write to client
multiAck = caps.Supports(capability.MultiACK)
multiAckDetailed = caps.Supports(capability.MultiACKDetailed)
go func() {
// TODO: support deepen-since, and deepen-not
var shupd packp.ShallowUpdate
if !upreq.Depth.IsZero() {
switch depth := upreq.Depth.(type) {
case packp.DepthCommits:
if err := getShallowCommits(st, wants, int(depth), &shupd); err != nil {
writec <- fmt.Errorf("getting shallow commits: %w", err)
return
}
default:
writec <- fmt.Errorf("unsupported depth type %T", upreq.Depth)
return
}
if err := shupd.Encode(w); err != nil {
writec <- fmt.Errorf("sending shallow-update: %w", err)
return
}
}
writec <- nil
}()
}
if err := <-writec; err != nil {
return err
}
var uphav packp.UploadHaves
if err := uphav.Decode(rd); err != nil {
return fmt.Errorf("decoding upload-haves: %w", err)
}
if err := r.Close(); err != nil {
return fmt.Errorf("closing reader: %w", err)
}
haves = append(haves, uphav.Haves...)
done = uphav.Done
common := map[plumbing.Hash]struct{}{}
var ack packp.ACK
var acks []packp.ACK
for _, hu := range uphav.Haves {
refs, ok := havesWithRef[hu]
if ok {
for _, ref := range refs {
common[ref] = struct{}{}
}
}
var status packp.ACKStatus
if multiAckDetailed {
status = packp.ACKCommon
if !ok {
status = packp.ACKReady
}
} else if multiAck {
status = packp.ACKContinue
}
if ok || multiAck || multiAckDetailed {
ack = packp.ACK{Hash: hu, Status: status}
acks = append(acks, ack)
if !multiAck && !multiAckDetailed {
break
}
}
}
go func() {
defer close(writec)
if len(haves) > 0 {
// Encode ACKs to client when we have haves
srvrsp := packp.ServerResponse{ACKs: acks}
if err := srvrsp.Encode(w); err != nil {
writec <- fmt.Errorf("sending acks server-response: %w", err)
return
}
}
if !done {
if multiAck || multiAckDetailed {
// Encode a NAK for multi-ack
srvrsp := packp.ServerResponse{}
if err := srvrsp.Encode(w); err != nil {
writec <- fmt.Errorf("sending nak server-response: %w", err)
return
}
}
} else if !ack.Hash.IsZero() && (multiAck || multiAckDetailed) {
// We're done, send the final ACK
ack.Status = 0
srvrsp := packp.ServerResponse{ACKs: []packp.ACK{ack}}
if err := srvrsp.Encode(w); err != nil {
writec <- fmt.Errorf("sending final ack server-response: %w", err)
return
}
} else if ack.Hash.IsZero() {
// We don't have multi-ack and there are no haves. Encode a NAK.
srvrsp := packp.ServerResponse{}
if err := srvrsp.Encode(w); err != nil {
writec <- fmt.Errorf("sending final nak server-response: %w", err)
return
}
}
writec <- nil
}()
if err := <-writec; err != nil {
return err
}
firstRound = false
}
// Done with the request, now close the reader
// to indicate that we are done reading from it.
if err := r.Close(); err != nil {
return fmt.Errorf("closing reader: %w", err)
}
objs, err := objectsToUpload(st, wants, haves)
if err != nil {
w.Close() //nolint:errcheck
return fmt.Errorf("getting objects to upload: %w", err)
}
var (
useSideband bool
writer io.Writer = w
)
if caps.Supports(capability.Sideband64k) {
writer = sideband.NewMuxer(sideband.Sideband64k, w)
useSideband = true
} else if caps.Supports(capability.Sideband) {
writer = sideband.NewMuxer(sideband.Sideband, w)
useSideband = true
}
// TODO: Support shallow-file
// TODO: Support thin-pack
e := packfile.NewEncoder(writer, st, false)
_, err = e.Encode(objs, 10)
if err != nil {
return fmt.Errorf("encoding packfile: %w", err)
}
if useSideband {
if err := pktline.WriteFlush(w); err != nil {
return fmt.Errorf("flushing sideband: %w", err)
}
}
if err := w.Close(); err != nil {
return fmt.Errorf("closing writer: %w", err)
}
return nil
}
func objectsToUpload(st storage.Storer, wants, haves []plumbing.Hash) ([]plumbing.Hash, error) {
return revlist.Objects(st, wants, haves)
}
func getShallowCommits(st storage.Storer, heads []plumbing.Hash, depth int, upd *packp.ShallowUpdate) error {
var i, curDepth int
var commit *object.Commit
depths := map[*object.Commit]int{}
stack := []object.Object{}
for commit != nil || i < len(heads) || len(stack) > 0 {
if commit == nil {
if i < len(heads) {
obj, err := st.EncodedObject(plumbing.CommitObject, heads[i])
i++
if err != nil {
continue
}
commit, err = object.DecodeCommit(st, obj)
if err != nil {
commit = nil
continue
}
depths[commit] = 0
curDepth = 0
} else if len(stack) > 0 {
commit = stack[len(stack)-1].(*object.Commit)
stack = stack[:len(stack)-1]
curDepth = depths[commit]
}
}
curDepth++
if depth != math.MaxInt && curDepth >= depth {
upd.Shallows = append(upd.Shallows, commit.Hash)
commit = nil
continue
}
upd.Unshallows = append(upd.Unshallows, commit.Hash)
parents := commit.Parents()
commit = nil
for {
parent, err := parents.Next()
if err == io.EOF {
break
}
if err != nil {
return err
}
if depths[parent] != 0 && curDepth >= depths[parent] {
continue
}
depths[parent] = curDepth
if _, err := parents.Next(); err == nil {
stack = append(stack, parent)
} else {
commit = parent
curDepth = depths[commit]
}
}
}
return nil
}
package transport
import (
"strings"
"github.com/go-git/go-git/v6/plumbing/format/pktline"
"github.com/go-git/go-git/v6/plumbing/protocol"
"github.com/go-git/go-git/v6/utils/ioutil"
)
// DiscoverVersion reads the first pktline from the reader to determine the
// protocol version. This is used by the client to determine the protocol
// version of the server.
func DiscoverVersion(r ioutil.ReadPeeker) (protocol.Version, error) {
ver := protocol.V0
_, pktb, err := pktline.PeekLine(r)
if err != nil {
return ver, err
}
pkt := strings.TrimSpace(string(pktb))
if strings.HasPrefix(pkt, "version ") {
// Consume the version packet
pktline.ReadLine(r) //nolint:errcheck
if v, _ := protocol.Parse(pkt[8:]); v > ver {
ver = protocol.Version(v)
}
}
return ver, nil
}
// ProtocolVersion tries to find the version parameter in the protocol string.
// This expects the protocol string from the GIT_PROTOCOL environment variable.
// This is used by the server to determine the protocol version requested by
// the client.
func ProtocolVersion(p string) protocol.Version {
var ver protocol.Version
for param := range strings.SplitSeq(p, ":") {
if strings.HasPrefix(param, "version=") {
if v, _ := protocol.Parse(param[8:]); v > ver {
ver = protocol.Version(v)
}
}
}
return ver
}