// Package revision extracts git revision from string // More information about revision : https://www.kernel.org/pub/software/scm/git/docs/gitrevisions.html package revision import ( "bytes" "fmt" "io" "regexp" "strconv" "time" ) // ErrInvalidRevision is emitted if string doesn't match valid revision type ErrInvalidRevision struct { s string } func (e *ErrInvalidRevision) Error() string { return "Revision invalid : " + e.s } // Revisioner represents a revision component. // A revision is made of multiple revision components // obtained after parsing a revision string, // for instance revision "master~" will be converted in // two revision components Ref and TildePath type Revisioner interface { } // Ref represents a reference name : HEAD, master, <hash> type Ref string // TildePath represents ~, ~{n} type TildePath struct { Depth int } // CaretPath represents ^, ^{n} type CaretPath struct { Depth int } // CaretReg represents ^{/foo bar} type CaretReg struct { Regexp *regexp.Regexp Negate bool } // CaretType represents ^{commit} type CaretType struct { ObjectType string } // AtReflog represents @{n} type AtReflog struct { Depth int } // AtCheckout represents @{-n} type AtCheckout struct { Depth int } // AtUpstream represents @{upstream}, @{u} type AtUpstream struct { BranchName string } // AtPush represents @{push} type AtPush struct { BranchName string } // AtDate represents @{"2006-01-02T15:04:05Z"} type AtDate struct { Date time.Time } // ColonReg represents :/foo bar type ColonReg struct { Regexp *regexp.Regexp Negate bool } // ColonPath represents :./<path> :<path> type ColonPath struct { Path string } // ColonStagePath represents :<n>:/<path> type ColonStagePath struct { Path string Stage int } // Parser represents a parser // use to tokenize and transform to revisioner chunks // a given string type Parser struct { s *scanner currentParsedChar struct { tok token lit string } unreadLastChar bool } // NewParserFromString returns a new instance of parser from a string. func NewParserFromString(s string) *Parser { return NewParser(bytes.NewBufferString(s)) } // NewParser returns a new instance of parser. func NewParser(r io.Reader) *Parser { return &Parser{s: newScanner(r)} } // scan returns the next token from the underlying scanner // or the last scanned token if an unscan was requested func (p *Parser) scan() (token, string, error) { if p.unreadLastChar { p.unreadLastChar = false return p.currentParsedChar.tok, p.currentParsedChar.lit, nil } tok, lit, err := p.s.scan() p.currentParsedChar.tok, p.currentParsedChar.lit = tok, lit return tok, lit, err } // unscan pushes the previously read token back onto the buffer. func (p *Parser) unscan() { p.unreadLastChar = true } // Parse explode a revision string into revisioner chunks func (p *Parser) Parse() ([]Revisioner, error) { var rev Revisioner var revs []Revisioner var tok token var err error for { tok, _, err = p.scan() if err != nil { return nil, err } switch tok { case at: rev, err = p.parseAt() case tilde: rev, err = p.parseTilde() case caret: rev, err = p.parseCaret() case colon: rev, err = p.parseColon() case eof: err = p.validateFullRevision(&revs) if err != nil { return []Revisioner{}, err } return revs, nil default: p.unscan() rev, err = p.parseRef() } if err != nil { return []Revisioner{}, err } revs = append(revs, rev) } } // validateFullRevision ensures all revisioner chunks make a valid revision func (p *Parser) validateFullRevision(chunks *[]Revisioner) error { var hasReference bool for i, chunk := range *chunks { switch chunk.(type) { case Ref: if i == 0 { hasReference = true } else { return &ErrInvalidRevision{`reference must be defined once at the beginning`} } case AtDate: if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { return nil } return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<ISO-8601 date>}, @{<ISO-8601 date>}`} case AtReflog: if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { return nil } return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{<n>}, @{<n>}`} case AtCheckout: if len(*chunks) == 1 { return nil } return &ErrInvalidRevision{`"@" statement is not valid, could be : @{-<n>}`} case AtUpstream: if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { return nil } return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{upstream}, @{upstream}, <refname>@{u}, @{u}`} case AtPush: if len(*chunks) == 1 || hasReference && len(*chunks) == 2 { return nil } return &ErrInvalidRevision{`"@" statement is not valid, could be : <refname>@{push}, @{push}`} case TildePath, CaretPath, CaretReg: if !hasReference { return &ErrInvalidRevision{`"~" or "^" statement must have a reference defined at the beginning`} } case ColonReg: if len(*chunks) == 1 { return nil } return &ErrInvalidRevision{`":" statement is not valid, could be : :/<regexp>`} case ColonPath: if i == len(*chunks)-1 && hasReference || len(*chunks) == 1 { return nil } return &ErrInvalidRevision{`":" statement is not valid, could be : <revision>:<path>`} case ColonStagePath: if len(*chunks) == 1 { return nil } return &ErrInvalidRevision{`":" statement is not valid, could be : :<n>:<path>`} } } return nil } // parseAt extract @ statements func (p *Parser) parseAt() (Revisioner, error) { var tok, nextTok token var lit, nextLit string var err error tok, _, err = p.scan() if err != nil { return nil, err } if tok != obrace { p.unscan() return Ref("HEAD"), nil } tok, lit, err = p.scan() if err != nil { return nil, err } nextTok, nextLit, err = p.scan() if err != nil { return nil, err } switch { case tok == word && (lit == "u" || lit == "upstream") && nextTok == cbrace: return AtUpstream{}, nil case tok == word && lit == "push" && nextTok == cbrace: return AtPush{}, nil case tok == number && nextTok == cbrace: n, _ := strconv.Atoi(lit) return AtReflog{n}, nil case tok == minus && nextTok == number: n, _ := strconv.Atoi(nextLit) t, _, err := p.scan() if err != nil { return nil, err } if t != cbrace { return nil, &ErrInvalidRevision{s: `missing "}" in @{-n} structure`} } return AtCheckout{n}, nil default: p.unscan() date := lit for { tok, lit, err = p.scan() if err != nil { return nil, err } switch { case tok == cbrace: t, err := time.Parse("2006-01-02T15:04:05Z", date) if err != nil { return nil, &ErrInvalidRevision{fmt.Sprintf(`wrong date "%s" must fit ISO-8601 format : 2006-01-02T15:04:05Z`, date)} } return AtDate{t}, nil case tok == eof: return nil, &ErrInvalidRevision{s: `missing "}" in @{<data>} structure`} default: date += lit } } } } // parseTilde extract ~ statements func (p *Parser) parseTilde() (Revisioner, error) { var tok token var lit string var err error tok, lit, err = p.scan() if err != nil { return nil, err } switch { case tok == number: n, _ := strconv.Atoi(lit) return TildePath{n}, nil default: p.unscan() return TildePath{1}, nil } } // parseCaret extract ^ statements func (p *Parser) parseCaret() (Revisioner, error) { var tok token var lit string var err error tok, lit, err = p.scan() if err != nil { return nil, err } switch { case tok == obrace: r, err := p.parseCaretBraces() if err != nil { return nil, err } return r, nil case tok == number: n, _ := strconv.Atoi(lit) if n > 2 { return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" found must be 0, 1 or 2 after "^"`, lit)} } return CaretPath{n}, nil default: p.unscan() return CaretPath{1}, nil } } // parseCaretBraces extract ^{<data>} statements func (p *Parser) parseCaretBraces() (Revisioner, error) { var tok, nextTok token var lit, _ string start := true var re string var negate bool var err error for { tok, lit, err = p.scan() if err != nil { return nil, err } nextTok, _, err = p.scan() if err != nil { return nil, err } switch { case tok == word && nextTok == cbrace && (lit == "commit" || lit == "tree" || lit == "blob" || lit == "tag" || lit == "object"): return CaretType{lit}, nil case re == "" && tok == cbrace: return CaretType{"tag"}, nil case re == "" && tok == emark && nextTok == emark: re += lit case re == "" && tok == emark && nextTok == minus: negate = true case re == "" && tok == emark: return nil, &ErrInvalidRevision{s: `revision suffix brace component sequences starting with "/!" others than those defined are reserved`} case re == "" && tok == slash: p.unscan() case tok != slash && start: return nil, &ErrInvalidRevision{fmt.Sprintf(`"%s" is not a valid revision suffix brace component`, lit)} case tok == eof: return nil, &ErrInvalidRevision{s: `missing "}" in ^{<data>} structure`} case tok != cbrace: p.unscan() re += lit case tok == cbrace: p.unscan() reg, err := regexp.Compile(re) if err != nil { return CaretReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())} } return CaretReg{reg, negate}, nil } start = false } } // parseColon extract : statements func (p *Parser) parseColon() (Revisioner, error) { var tok token var err error tok, _, err = p.scan() if err != nil { return nil, err } switch tok { case slash: return p.parseColonSlash() default: p.unscan() return p.parseColonDefault() } } // parseColonSlash extract :/<data> statements func (p *Parser) parseColonSlash() (Revisioner, error) { var tok, nextTok token var lit string var re string var negate bool var err error for { tok, lit, err = p.scan() if err != nil { return nil, err } nextTok, _, err = p.scan() if err != nil { return nil, err } switch { case tok == emark && nextTok == emark: re += lit case re == "" && tok == emark && nextTok == minus: negate = true case re == "" && tok == emark: return nil, &ErrInvalidRevision{s: `revision suffix brace component sequences starting with "/!" others than those defined are reserved`} case tok == eof: p.unscan() reg, err := regexp.Compile(re) if err != nil { return ColonReg{}, &ErrInvalidRevision{fmt.Sprintf(`revision suffix brace component, %s`, err.Error())} } return ColonReg{reg, negate}, nil default: p.unscan() re += lit } } } // parseColonDefault extract :<data> statements func (p *Parser) parseColonDefault() (Revisioner, error) { var tok token var lit string var path string var stage int var err error var n = -1 tok, lit, err = p.scan() if err != nil { return nil, err } nextTok, _, err := p.scan() if err != nil { return nil, err } if tok == number && nextTok == colon { n, _ = strconv.Atoi(lit) } switch n { case 0, 1, 2, 3: stage = n default: path += lit p.unscan() } for { tok, lit, err = p.scan() if err != nil { return nil, err } switch { case tok == eof && n == -1: return ColonPath{path}, nil case tok == eof: return ColonStagePath{path, stage}, nil default: path += lit } } } // parseRef extract reference name func (p *Parser) parseRef() (Revisioner, error) { var tok, prevTok token var lit, buf string var endOfRef bool var err error for { tok, lit, err = p.scan() if err != nil { return nil, err } switch tok { case eof, at, colon, tilde, caret: endOfRef = true } err := p.checkRefFormat(tok, lit, prevTok, buf, endOfRef) if err != nil { return "", err } if endOfRef { p.unscan() return Ref(buf), nil } buf += lit prevTok = tok } } // checkRefFormat ensure reference name follow rules defined here : // https://git-scm.com/docs/git-check-ref-format func (p *Parser) checkRefFormat(token token, literal string, previousToken token, buffer string, endOfRef bool) error { switch token { case aslash, space, control, qmark, asterisk, obracket: return &ErrInvalidRevision{fmt.Sprintf(`must not contains "%s"`, literal)} } switch { case (token == dot || token == slash) && buffer == "": return &ErrInvalidRevision{fmt.Sprintf(`must not start with "%s"`, literal)} case previousToken == slash && endOfRef: return &ErrInvalidRevision{`must not end with "/"`} case previousToken == dot && endOfRef: return &ErrInvalidRevision{`must not end with "."`} case token == dot && previousToken == slash: return &ErrInvalidRevision{`must not contains "/."`} case previousToken == dot && token == dot: return &ErrInvalidRevision{`must not contains ".."`} case previousToken == slash && token == slash: return &ErrInvalidRevision{`must not contains consecutively "/"`} case (token == slash || endOfRef) && len(buffer) > 4 && buffer[len(buffer)-5:] == ".lock": return &ErrInvalidRevision{"cannot end with .lock"} } return nil }
package revision import ( "bufio" "io" "unicode" ) // runeCategoryValidator takes a rune as input and // validates it belongs to a rune category type runeCategoryValidator func(r rune) bool // tokenizeExpression aggregates a series of runes matching check predicate into a single // string and provides given tokenType as token type func tokenizeExpression(ch rune, tokenType token, check runeCategoryValidator, r *bufio.Reader) (token, string, error) { var data []rune data = append(data, ch) for { c, _, err := r.ReadRune() if c == zeroRune { break } if err != nil { return tokenError, "", err } if check(c) { data = append(data, c) } else { err := r.UnreadRune() if err != nil { return tokenError, "", err } return tokenType, string(data), nil } } return tokenType, string(data), nil } // maxRevisionLength holds the maximum length that will be parsed for a // revision. Git itself doesn't enforce a max length, but rather leans on // the OS to enforce it via its ARG_MAX. const maxRevisionLength = 128 * 1024 // 128kb var zeroRune = rune(0) // scanner represents a lexical scanner. type scanner struct { r *bufio.Reader } // newScanner returns a new instance of scanner. func newScanner(r io.Reader) *scanner { return &scanner{r: bufio.NewReader(io.LimitReader(r, maxRevisionLength))} } // Scan extracts tokens and their strings counterpart // from the reader func (s *scanner) scan() (token, string, error) { ch, _, err := s.r.ReadRune() if err != nil && err != io.EOF { return tokenError, "", err } switch ch { case zeroRune: return eof, "", nil case ':': return colon, string(ch), nil case '~': return tilde, string(ch), nil case '^': return caret, string(ch), nil case '.': return dot, string(ch), nil case '/': return slash, string(ch), nil case '{': return obrace, string(ch), nil case '}': return cbrace, string(ch), nil case '-': return minus, string(ch), nil case '@': return at, string(ch), nil case '\\': return aslash, string(ch), nil case '?': return qmark, string(ch), nil case '*': return asterisk, string(ch), nil case '[': return obracket, string(ch), nil case '!': return emark, string(ch), nil } if unicode.IsSpace(ch) { return space, string(ch), nil } if unicode.IsControl(ch) { return control, string(ch), nil } if unicode.IsLetter(ch) { return tokenizeExpression(ch, word, unicode.IsLetter, s.r) } if unicode.IsNumber(ch) { return tokenizeExpression(ch, number, unicode.IsNumber, s.r) } return tokenError, string(ch), nil }
package config // New creates a new config instance. func New() *Config { return &Config{} } // Config contains all the sections, comments and includes from a config file. type Config struct { Comment *Comment Sections Sections Includes Includes } // Includes is a list of Includes in a config file. type Includes []*Include // Include is a reference to an included config file. type Include struct { Path string Config *Config } // Comment string without the prefix '#' or ';'. type Comment string const ( // NoSubsection token is passed to Config.Section and Config.SetSection to // represent the absence of a section. NoSubsection = "" ) // Section returns a existing section with the given name or creates a new one. func (c *Config) Section(name string) *Section { for i := len(c.Sections) - 1; i >= 0; i-- { s := c.Sections[i] if s.IsName(name) { return s } } s := &Section{Name: name} c.Sections = append(c.Sections, s) return s } // HasSection checks if the Config has a section with the specified name. func (c *Config) HasSection(name string) bool { for _, s := range c.Sections { if s.IsName(name) { return true } } return false } // RemoveSection removes a section from a config file. func (c *Config) RemoveSection(name string) *Config { result := Sections{} for _, s := range c.Sections { if !s.IsName(name) { result = append(result, s) } } c.Sections = result return c } // RemoveSubsection remove a subsection from a config file. func (c *Config) RemoveSubsection(section string, subsection string) *Config { for _, s := range c.Sections { if s.IsName(section) { result := Subsections{} for _, ss := range s.Subsections { if !ss.IsName(subsection) { result = append(result, ss) } } s.Subsections = result } } return c } // AddOption adds an option to a given section and subsection. Use the // NoSubsection constant for the subsection argument if no subsection is wanted. func (c *Config) AddOption(section string, subsection string, key string, value string) *Config { if subsection == "" { c.Section(section).AddOption(key, value) } else { c.Section(section).Subsection(subsection).AddOption(key, value) } return c } // SetOption sets an option to a given section and subsection. Use the // NoSubsection constant for the subsection argument if no subsection is wanted. func (c *Config) SetOption(section string, subsection string, key string, value string) *Config { if subsection == "" { c.Section(section).SetOption(key, value) } else { c.Section(section).Subsection(subsection).SetOption(key, value) } return c }
package config import ( "io" "github.com/go-git/gcfg/v2" ) // A Decoder reads and decodes config files from an input stream. type Decoder struct { io.Reader } // NewDecoder returns a new decoder that reads from r. func NewDecoder(r io.Reader) *Decoder { return &Decoder{r} } // Decode reads the whole config from its input and stores it in the // value pointed to by config. func (d *Decoder) Decode(config *Config) error { cb := func(s string, ss string, k string, v string, bv bool) error { if ss == "" && k == "" { config.Section(s) return nil } if ss != "" && k == "" { config.Section(s).Subsection(ss) return nil } config.AddOption(s, ss, k, v) return nil } return gcfg.ReadWithCallback(d, cb) }
package config import ( "fmt" "io" "strings" ) // An Encoder writes config files to an output stream. type Encoder struct { w io.Writer } var ( subsectionReplacer = strings.NewReplacer(`"`, `\"`, `\`, `\\`) valueReplacer = strings.NewReplacer(`"`, `\"`, `\`, `\\`, "\n", `\n`, "\t", `\t`, "\b", `\b`) ) // NewEncoder returns a new encoder that writes to w. func NewEncoder(w io.Writer) *Encoder { return &Encoder{w} } // Encode writes the config in git config format to the stream of the encoder. func (e *Encoder) Encode(cfg *Config) error { for _, s := range cfg.Sections { if err := e.encodeSection(s); err != nil { return err } } return nil } func (e *Encoder) encodeSection(s *Section) error { if len(s.Options) > 0 { if err := e.printf("[%s]\n", s.Name); err != nil { return err } if err := e.encodeOptions(s.Options); err != nil { return err } } for _, ss := range s.Subsections { if err := e.encodeSubsection(s.Name, ss); err != nil { return err } } return nil } func (e *Encoder) encodeSubsection(sectionName string, s *Subsection) error { if err := e.printf("[%s \"%s\"]\n", sectionName, subsectionReplacer.Replace(s.Name)); err != nil { return err } return e.encodeOptions(s.Options) } func (e *Encoder) encodeOptions(opts Options) error { for _, o := range opts { var value string if strings.ContainsAny(o.Value, "#;\"\t\n\\") || strings.HasPrefix(o.Value, " ") || strings.HasSuffix(o.Value, " ") { value = `"`+valueReplacer.Replace(o.Value)+`"` } else { value = o.Value } if err := e.printf("\t%s = %s\n", o.Key, value); err != nil { return err } } return nil } func (e *Encoder) printf(msg string, args ...interface{}) error { _, err := fmt.Fprintf(e.w, msg, args...) return err }
package config import "errors" // RepositoryFormatVersion represents the repository format version, // as per defined at: // // https://git-scm.com/docs/repository-version type RepositoryFormatVersion string const ( // Version_0 is the format defined by the initial version of git, // including but not limited to the format of the repository // directory, the repository configuration file, and the object // and ref storage. // // Specifying the complete behavior of git is beyond the scope // of this document. Version_0 = "0" // Version_1 is identical to version 0, with the following exceptions: // // 1. When reading the core.repositoryformatversion variable, a git // implementation which supports version 1 MUST also read any // configuration keys found in the extensions section of the // configuration file. // // 2. If a version-1 repository specifies any extensions.* keys that // the running git has not implemented, the operation MUST NOT proceed. // Similarly, if the value of any known key is not understood by the // implementation, the operation MUST NOT proceed. // // Note that if no extensions are specified in the config file, then // core.repositoryformatversion SHOULD be set to 0 (setting it to 1 provides // no benefit, and makes the repository incompatible with older // implementations of git). Version_1 = "1" // DefaultRepositoryFormatVersion holds the default repository format version. DefaultRepositoryFormatVersion = Version_0 ) // ObjectFormat defines the object format. type ObjectFormat int const ( // SHA1 represents the object format used for SHA1. SHA1 ObjectFormat = iota // SHA256 represents the object format used for SHA256. SHA256 // DefaultObjectFormat holds the default object format. DefaultObjectFormat = SHA1 ) // String returns the string representation of the ObjectFormat. func (f ObjectFormat) String() string { switch f { case SHA1: return "sha1" case SHA256: return "sha256" default: return "" } } // Size returns the hash size of the ObjectFormat. func (f ObjectFormat) Size() int { switch f { case SHA1: return SHA1Size case SHA256: return SHA256Size default: return 0 } } // HexSize returns the hash size in hexadecimal format of the ObjectFormat. func (f ObjectFormat) HexSize() int { switch f { case SHA1: return SHA1HexSize case SHA256: return SHA256HexSize default: return 0 } } // ErrInvalidObjectFormat is returned when an invalid ObjectFormat is used. var ErrInvalidObjectFormat = errors.New("invalid object format") const ( // SHA1Size is the size of SHA1 hash. SHA1Size = 20 // SHA256Size is the size of SHA256 hash. SHA256Size = 32 // SHA1HexSize is the size of SHA1 hash in hexadecimal format. SHA1HexSize = SHA1Size * 2 // SHA256HexSize is the size of SHA256 hash in hexadecimal format. SHA256HexSize = SHA256Size * 2 )
package config import ( "fmt" "strings" ) // Option defines a key/value entity in a config file. type Option struct { // Key preserving original caseness. // Use IsKey instead to compare key regardless of caseness. Key string // Original value as string, could be not normalized. Value string } type Options []*Option // IsKey returns true if the given key matches // this option's key in a case-insensitive comparison. func (o *Option) IsKey(key string) bool { return strings.EqualFold(o.Key, key) } func (opts Options) GoString() string { var strs []string for _, opt := range opts { strs = append(strs, fmt.Sprintf("%#v", opt)) } return strings.Join(strs, ", ") } // Get gets the value for the given key if set, // otherwise it returns the empty string. // // Note that there is no difference // // This matches git behaviour since git v1.8.1-rc1, // if there are multiple definitions of a key, the // last one wins. // // See: http://article.gmane.org/gmane.linux.kernel/1407184 // // In order to get all possible values for the same key, // use GetAll. func (opts Options) Get(key string) string { for i := len(opts) - 1; i >= 0; i-- { o := opts[i] if o.IsKey(key) { return o.Value } } return "" } // Has checks if an Option exist with the given key. func (opts Options) Has(key string) bool { for _, o := range opts { if o.IsKey(key) { return true } } return false } // GetAll returns all possible values for the same key. func (opts Options) GetAll(key string) []string { result := []string{} for _, o := range opts { if o.IsKey(key) { result = append(result, o.Value) } } return result } func (opts Options) withoutOption(key string) Options { result := Options{} for _, o := range opts { if !o.IsKey(key) { result = append(result, o) } } return result } func (opts Options) withAddedOption(key string, value string) Options { return append(opts, &Option{key, value}) } func (opts Options) withSettedOption(key string, values ...string) Options { var result Options var added []string for _, o := range opts { if !o.IsKey(key) { result = append(result, o) continue } if contains(values, o.Value) { added = append(added, o.Value) result = append(result, o) continue } } for _, value := range values { if contains(added, value) { continue } result = result.withAddedOption(key, value) } return result } func contains(haystack []string, needle string) bool { for _, s := range haystack { if s == needle { return true } } return false }
package config import ( "fmt" "strings" ) // Section is the representation of a section inside git configuration files. // Each Section contains Options that are used by both the Git plumbing // and the porcelains. // Sections can be further divided into subsections. To begin a subsection // put its name in double quotes, separated by space from the section name, // in the section header, like in the example below: // // [section "subsection"] // // All the other lines (and the remainder of the line after the section header) // are recognized as option variables, in the form "name = value" (or just name, // which is a short-hand to say that the variable is the boolean "true"). // The variable names are case-insensitive, allow only alphanumeric characters // and -, and must start with an alphabetic character: // // [section "subsection1"] // option1 = value1 // option2 // [section "subsection2"] // option3 = value2 // type Section struct { Name string Options Options Subsections Subsections } type Subsection struct { Name string Options Options } type Sections []*Section func (s Sections) GoString() string { var strs []string for _, ss := range s { strs = append(strs, fmt.Sprintf("%#v", ss)) } return strings.Join(strs, ", ") } type Subsections []*Subsection func (s Subsections) GoString() string { var strs []string for _, ss := range s { strs = append(strs, fmt.Sprintf("%#v", ss)) } return strings.Join(strs, ", ") } // IsName checks if the name provided is equals to the Section name, case insensitive. func (s *Section) IsName(name string) bool { return strings.EqualFold(s.Name, name) } // Subsection returns a Subsection from the specified Section. If the // Subsection does not exists, new one is created and added to Section. func (s *Section) Subsection(name string) *Subsection { for i := len(s.Subsections) - 1; i >= 0; i-- { ss := s.Subsections[i] if ss.IsName(name) { return ss } } ss := &Subsection{Name: name} s.Subsections = append(s.Subsections, ss) return ss } // HasSubsection checks if the Section has a Subsection with the specified name. func (s *Section) HasSubsection(name string) bool { for _, ss := range s.Subsections { if ss.IsName(name) { return true } } return false } // RemoveSubsection removes a subsection from a Section. func (s *Section) RemoveSubsection(name string) *Section { result := Subsections{} for _, s := range s.Subsections { if !s.IsName(name) { result = append(result, s) } } s.Subsections = result return s } // Option returns the value for the specified key. Empty string is returned if // key does not exists. func (s *Section) Option(key string) string { return s.Options.Get(key) } // OptionAll returns all possible values for an option with the specified key. // If the option does not exists, an empty slice will be returned. func (s *Section) OptionAll(key string) []string { return s.Options.GetAll(key) } // HasOption checks if the Section has an Option with the given key. func (s *Section) HasOption(key string) bool { return s.Options.Has(key) } // AddOption adds a new Option to the Section. The updated Section is returned. func (s *Section) AddOption(key string, value string) *Section { s.Options = s.Options.withAddedOption(key, value) return s } // SetOption adds a new Option to the Section. If the option already exists, is replaced. // The updated Section is returned. func (s *Section) SetOption(key string, value string) *Section { s.Options = s.Options.withSettedOption(key, value) return s } // Remove an option with the specified key. The updated Section is returned. func (s *Section) RemoveOption(key string) *Section { s.Options = s.Options.withoutOption(key) return s } // IsName checks if the name of the subsection is exactly the specified name. func (s *Subsection) IsName(name string) bool { return s.Name == name } // Option returns an option with the specified key. If the option does not exists, // empty spring will be returned. func (s *Subsection) Option(key string) string { return s.Options.Get(key) } // OptionAll returns all possible values for an option with the specified key. // If the option does not exists, an empty slice will be returned. func (s *Subsection) OptionAll(key string) []string { return s.Options.GetAll(key) } // HasOption checks if the Subsection has an Option with the given key. func (s *Subsection) HasOption(key string) bool { return s.Options.Has(key) } // AddOption adds a new Option to the Subsection. The updated Subsection is returned. func (s *Subsection) AddOption(key string, value string) *Subsection { s.Options = s.Options.withAddedOption(key, value) return s } // SetOption adds a new Option to the Subsection. If the option already exists, is replaced. // The updated Subsection is returned. func (s *Subsection) SetOption(key string, value ...string) *Subsection { s.Options = s.Options.withSettedOption(key, value...) return s } // RemoveOption removes the option with the specified key. The updated Subsection is returned. func (s *Subsection) RemoveOption(key string) *Subsection { s.Options = s.Options.withoutOption(key) return s }
package packfile import ( "io" "time" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/ioutil" "github.com/go-git/go-git/v6/utils/sync" "github.com/go-git/go-git/v6/utils/trace" ) var signature = []byte{'P', 'A', 'C', 'K'} const ( // VersionSupported is the packfile version supported by this package VersionSupported uint32 = 2 firstLengthBits = uint8(4) // the first byte into object header has 4 bits to store the length lengthBits = uint8(7) // subsequent bytes has 7 bits to store the length maskFirstLength = 15 // 0000 1111 maskContinue = 0x80 // 1000 0000 maskLength = uint8(127) // 0111 1111 maskType = uint8(112) // 0111 0000 ) // UpdateObjectStorage updates the storer with the objects in the given // packfile. func UpdateObjectStorage(s storer.Storer, packfile io.Reader) error { start := time.Now() defer func() { trace.Performance.Printf("performance: %.9f s: update_obj_storage", time.Since(start).Seconds()) }() if pw, ok := s.(storer.PackfileWriter); ok { return WritePackfileToObjectStorage(pw, packfile) } p := NewParser(packfile, WithStorage(s)) _, err := p.Parse() return err } // WritePackfileToObjectStorage writes all the packfile objects into the given // object storage. func WritePackfileToObjectStorage( sw storer.PackfileWriter, packfile io.Reader, ) (err error) { w, err := sw.PackfileWriter() if err != nil { return err } defer ioutil.CheckClose(w, &err) var n int64 buf := sync.GetByteSlice() n, err = io.CopyBuffer(w, packfile, *buf) sync.PutByteSlice(buf) if err == nil && n == 0 { return ErrEmptyPackfile } return err }
package packfile const blksz = 16 const maxChainLength = 64 // deltaIndex is a modified version of JGit's DeltaIndex adapted to our current // design. type deltaIndex struct { table []int entries []int mask int } func (idx *deltaIndex) init(buf []byte) { scanner := newDeltaIndexScanner(buf, len(buf)) idx.mask = scanner.mask idx.table = scanner.table idx.entries = make([]int, countEntries(scanner)+1) idx.copyEntries(scanner) } // findMatch returns the offset of src where the block starting at tgtOffset // is and the length of the match. A length of 0 means there was no match. A // length of -1 means the src length is lower than the blksz and whatever // other positive length is the length of the match in bytes. func (idx *deltaIndex) findMatch(src, tgt []byte, tgtOffset int) (srcOffset, l int) { if len(tgt) < tgtOffset+s { return 0, len(tgt) - tgtOffset } if len(src) < blksz { return 0, -1 } h := hashBlock(tgt, tgtOffset) tIdx := h & idx.mask eIdx := idx.table[tIdx] if eIdx == 0 { return } srcOffset = idx.entries[eIdx] l = matchLength(src, tgt, tgtOffset, srcOffset) return } func matchLength(src, tgt []byte, otgt, osrc int) (l int) { lensrc := len(src) lentgt := len(tgt) for (osrc < lensrc && otgt < lentgt) && src[osrc] == tgt[otgt] { l++ osrc++ otgt++ } return } func countEntries(scan *deltaIndexScanner) (cnt int) { // Figure out exactly how many entries we need. As we do the // enumeration truncate any delta chains longer than what we // are willing to scan during encode. This keeps the encode // logic linear in the size of the input rather than quadratic. for i := 0; i < len(scan.table); i++ { h := scan.table[i] if h == 0 { continue } size := 0 for { size++ if size == maxChainLength { scan.next[h] = 0 break } h = scan.next[h] if h == 0 { break } } cnt += size } return } func (idx *deltaIndex) copyEntries(scanner *deltaIndexScanner) { // Rebuild the entries list from the scanner, positioning all // blocks in the same hash chain next to each other. We can // then later discard the next list, along with the scanner. // next := 1 for i := 0; i < len(idx.table); i++ { h := idx.table[i] if h == 0 { continue } idx.table[i] = next for { idx.entries[next] = scanner.entries[h] next++ h = scanner.next[h] if h == 0 { break } } } } type deltaIndexScanner struct { table []int entries []int next []int mask int count int } func newDeltaIndexScanner(buf []byte, size int) *deltaIndexScanner { size -= size % blksz worstCaseBlockCnt := size / blksz if worstCaseBlockCnt < 1 { return new(deltaIndexScanner) } tableSize := tableSize(worstCaseBlockCnt) scanner := &deltaIndexScanner{ table: make([]int, tableSize), mask: tableSize - 1, entries: make([]int, worstCaseBlockCnt+1), next: make([]int, worstCaseBlockCnt+1), } scanner.scan(buf, size) return scanner } // slightly modified version of JGit's DeltaIndexScanner. We store the offset on the entries // instead of the entries and the key, so we avoid operations to retrieve the offset later, as // we don't use the key. // See: https://github.com/eclipse/jgit/blob/005e5feb4ecd08c4e4d141a38b9e7942accb3212/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/DeltaIndexScanner.java func (s *deltaIndexScanner) scan(buf []byte, end int) { lastHash := 0 ptr := end - blksz for { key := hashBlock(buf, ptr) tIdx := key & s.mask head := s.table[tIdx] if head != 0 && lastHash == key { s.entries[head] = ptr } else { s.count++ eIdx := s.count s.entries[eIdx] = ptr s.next[eIdx] = head s.table[tIdx] = eIdx } lastHash = key ptr -= blksz if 0 > ptr { break } } } func tableSize(worstCaseBlockCnt int) int { shift := 32 - leadingZeros(uint32(worstCaseBlockCnt)) sz := 1 << uint(shift-1) if sz < worstCaseBlockCnt { sz <<= 1 } return sz } // use https://golang.org/pkg/math/bits/#LeadingZeros32 in the future func leadingZeros(x uint32) (n int) { if x >= 1<<16 { x >>= 16 n = 16 } if x >= 1<<8 { x >>= 8 n += 8 } n += int(len8tab[x]) return 32 - n } var len8tab = [256]uint8{ 0x00, 0x01, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, } func hashBlock(raw []byte, ptr int) int { // The first 4 steps collapse out into a 4 byte big-endian decode, // with a larger right shift as we combined shift lefts together. // hash := ((uint32(raw[ptr]) & 0xff) << 24) | ((uint32(raw[ptr+1]) & 0xff) << 16) | ((uint32(raw[ptr+2]) & 0xff) << 8) | (uint32(raw[ptr+3]) & 0xff) hash ^= T[hash>>31] hash = ((hash << 8) | (uint32(raw[ptr+4]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+5]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+6]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+7]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+8]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+9]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+10]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+11]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+12]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+13]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+14]) & 0xff)) ^ T[hash>>23] hash = ((hash << 8) | (uint32(raw[ptr+15]) & 0xff)) ^ T[hash>>23] return int(hash) } var T = []uint32{0x00000000, 0xd4c6b32d, 0x7d4bd577, 0xa98d665a, 0x2e5119c3, 0xfa97aaee, 0x531accb4, 0x87dc7f99, 0x5ca23386, 0x886480ab, 0x21e9e6f1, 0xf52f55dc, 0x72f32a45, 0xa6359968, 0x0fb8ff32, 0xdb7e4c1f, 0x6d82d421, 0xb944670c, 0x10c90156, 0xc40fb27b, 0x43d3cde2, 0x97157ecf, 0x3e981895, 0xea5eabb8, 0x3120e7a7, 0xe5e6548a, 0x4c6b32d0, 0x98ad81fd, 0x1f71fe64, 0xcbb74d49, 0x623a2b13, 0xb6fc983e, 0x0fc31b6f, 0xdb05a842, 0x7288ce18, 0xa64e7d35, 0x219202ac, 0xf554b181, 0x5cd9d7db, 0x881f64f6, 0x536128e9, 0x87a79bc4, 0x2e2afd9e, 0xfaec4eb3, 0x7d30312a, 0xa9f68207, 0x007be45d, 0xd4bd5770, 0x6241cf4e, 0xb6877c63, 0x1f0a1a39, 0xcbcca914, 0x4c10d68d, 0x98d665a0, 0x315b03fa, 0xe59db0d7, 0x3ee3fcc8, 0xea254fe5, 0x43a829bf, 0x976e9a92, 0x10b2e50b, 0xc4745626, 0x6df9307c, 0xb93f8351, 0x1f8636de, 0xcb4085f3, 0x62cde3a9, 0xb60b5084, 0x31d72f1d, 0xe5119c30, 0x4c9cfa6a, 0x985a4947, 0x43240558, 0x97e2b675, 0x3e6fd02f, 0xeaa96302, 0x6d751c9b, 0xb9b3afb6, 0x103ec9ec, 0xc4f87ac1, 0x7204e2ff, 0xa6c251d2, 0x0f4f3788, 0xdb8984a5, 0x5c55fb3c, 0x88934811, 0x211e2e4b, 0xf5d89d66, 0x2ea6d179, 0xfa606254, 0x53ed040e, 0x872bb723, 0x00f7c8ba, 0xd4317b97, 0x7dbc1dcd, 0xa97aaee0, 0x10452db1, 0xc4839e9c, 0x6d0ef8c6, 0xb9c84beb, 0x3e143472, 0xead2875f, 0x435fe105, 0x97995228, 0x4ce71e37, 0x9821ad1a, 0x31accb40, 0xe56a786d, 0x62b607f4, 0xb670b4d9, 0x1ffdd283, 0xcb3b61ae, 0x7dc7f990, 0xa9014abd, 0x008c2ce7, 0xd44a9fca, 0x5396e053, 0x8750537e, 0x2edd3524, 0xfa1b8609, 0x2165ca16, 0xf5a3793b, 0x5c2e1f61, 0x88e8ac4c, 0x0f34d3d5, 0xdbf260f8, 0x727f06a2, 0xa6b9b58f, 0x3f0c6dbc, 0xebcade91, 0x4247b8cb, 0x96810be6, 0x115d747f, 0xc59bc752, 0x6c16a108, 0xb8d01225, 0x63ae5e3a, 0xb768ed17, 0x1ee58b4d, 0xca233860, 0x4dff47f9, 0x9939f4d4, 0x30b4928e, 0xe47221a3, 0x528eb99d, 0x86480ab0, 0x2fc56cea, 0xfb03dfc7, 0x7cdfa05e, 0xa8191373, 0x01947529, 0xd552c604, 0x0e2c8a1b, 0xdaea3936, 0x73675f6c, 0xa7a1ec41, 0x207d93d8, 0xf4bb20f5, 0x5d3646af, 0x89f0f582, 0x30cf76d3, 0xe409c5fe, 0x4d84a3a4, 0x99421089, 0x1e9e6f10, 0xca58dc3d, 0x63d5ba67, 0xb713094a, 0x6c6d4555, 0xb8abf678, 0x11269022, 0xc5e0230f, 0x423c5c96, 0x96faefbb, 0x3f7789e1, 0xebb13acc, 0x5d4da2f2, 0x898b11df, 0x20067785, 0xf4c0c4a8, 0x731cbb31, 0xa7da081c, 0x0e576e46, 0xda91dd6b, 0x01ef9174, 0xd5292259, 0x7ca44403, 0xa862f72e, 0x2fbe88b7, 0xfb783b9a, 0x52f55dc0, 0x8633eeed, 0x208a5b62, 0xf44ce84f, 0x5dc18e15, 0x89073d38, 0x0edb42a1, 0xda1df18c, 0x739097d6, 0xa75624fb, 0x7c2868e4, 0xa8eedbc9, 0x0163bd93, 0xd5a50ebe, 0x52797127, 0x86bfc20a, 0x2f32a450, 0xfbf4177d, 0x4d088f43, 0x99ce3c6e, 0x30435a34, 0xe485e919, 0x63599680, 0xb79f25ad, 0x1e1243f7, 0xcad4f0da, 0x11aabcc5, 0xc56c0fe8, 0x6ce169b2, 0xb827da9f, 0x3ffba506, 0xeb3d162b, 0x42b07071, 0x9676c35c, 0x2f49400d, 0xfb8ff320, 0x5202957a, 0x86c42657, 0x011859ce, 0xd5deeae3, 0x7c538cb9, 0xa8953f94, 0x73eb738b, 0xa72dc0a6, 0x0ea0a6fc, 0xda6615d1, 0x5dba6a48, 0x897cd965, 0x20f1bf3f, 0xf4370c12, 0x42cb942c, 0x960d2701, 0x3f80415b, 0xeb46f276, 0x6c9a8def, 0xb85c3ec2, 0x11d15898, 0xc517ebb5, 0x1e69a7aa, 0xcaaf1487, 0x632272dd, 0xb7e4c1f0, 0x3038be69, 0xe4fe0d44, 0x4d736b1e, 0x99b5d833, }
package packfile import ( "sort" "sync" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" ) const ( // deltas based on deltas, how many steps we can do. // 50 is the default value used in JGit maxDepth = int64(50) ) // applyDelta is the set of object types that we should apply deltas var applyDelta = map[plumbing.ObjectType]bool{ plumbing.BlobObject: true, plumbing.TreeObject: true, } type deltaSelector struct { storer storer.EncodedObjectStorer } func newDeltaSelector(s storer.EncodedObjectStorer) *deltaSelector { return &deltaSelector{s} } // ObjectsToPack creates a list of ObjectToPack from the hashes // provided, creating deltas if it's suitable, using an specific // internal logic. `packWindow` specifies the size of the sliding // window used to compare objects for delta compression; 0 turns off // delta compression entirely. func (dw *deltaSelector) ObjectsToPack( hashes []plumbing.Hash, packWindow uint, ) ([]*ObjectToPack, error) { otp, err := dw.objectsToPack(hashes, packWindow) if err != nil { return nil, err } if packWindow == 0 { return otp, nil } dw.sort(otp) var objectGroups [][]*ObjectToPack var prev *ObjectToPack i := -1 for _, obj := range otp { if prev == nil || prev.Type() != obj.Type() { objectGroups = append(objectGroups, []*ObjectToPack{obj}) i++ prev = obj } else { objectGroups[i] = append(objectGroups[i], obj) } } var wg sync.WaitGroup var once sync.Once for _, objs := range objectGroups { objs := objs wg.Add(1) go func() { if walkErr := dw.walk(objs, packWindow); walkErr != nil { once.Do(func() { err = walkErr }) } wg.Done() }() } wg.Wait() if err != nil { return nil, err } return otp, nil } func (dw *deltaSelector) objectsToPack( hashes []plumbing.Hash, packWindow uint, ) ([]*ObjectToPack, error) { var objectsToPack []*ObjectToPack for _, h := range hashes { var o plumbing.EncodedObject var err error if packWindow == 0 { o, err = dw.encodedObject(h) } else { o, err = dw.encodedDeltaObject(h) } if err != nil { return nil, err } otp := newObjectToPack(o) if _, ok := o.(plumbing.DeltaObject); ok { otp.CleanOriginal() } objectsToPack = append(objectsToPack, otp) } if packWindow == 0 { return objectsToPack, nil } if err := dw.fixAndBreakChains(objectsToPack); err != nil { return nil, err } return objectsToPack, nil } func (dw *deltaSelector) encodedDeltaObject(h plumbing.Hash) (plumbing.EncodedObject, error) { edos, ok := dw.storer.(storer.DeltaObjectStorer) if !ok { return dw.encodedObject(h) } return edos.DeltaObject(plumbing.AnyObject, h) } func (dw *deltaSelector) encodedObject(h plumbing.Hash) (plumbing.EncodedObject, error) { return dw.storer.EncodedObject(plumbing.AnyObject, h) } func (dw *deltaSelector) fixAndBreakChains(objectsToPack []*ObjectToPack) error { m := make(map[plumbing.Hash]*ObjectToPack, len(objectsToPack)) for _, otp := range objectsToPack { m[otp.Hash()] = otp } for _, otp := range objectsToPack { if err := dw.fixAndBreakChainsOne(m, otp); err != nil { return err } } return nil } func (dw *deltaSelector) fixAndBreakChainsOne(objectsToPack map[plumbing.Hash]*ObjectToPack, otp *ObjectToPack) error { if !otp.Object.Type().IsDelta() { return nil } // Initial ObjectToPack instances might have a delta assigned to Object // but no actual base initially. Once Base is assigned to a delta, it means // we already fixed it. if otp.Base != nil { return nil } do, ok := otp.Object.(plumbing.DeltaObject) if !ok { // if this is not a DeltaObject, then we cannot retrieve its base, // so we have to break the delta chain here. return dw.undeltify(otp) } base, ok := objectsToPack[do.BaseHash()] if !ok { // The base of the delta is not in our list of objects to pack, so // we break the chain. return dw.undeltify(otp) } if err := dw.fixAndBreakChainsOne(objectsToPack, base); err != nil { return err } otp.SetDelta(base, otp.Object) return nil } func (dw *deltaSelector) restoreOriginal(otp *ObjectToPack) error { if otp.Original != nil { return nil } if !otp.Object.Type().IsDelta() { return nil } obj, err := dw.encodedObject(otp.Hash()) if err != nil { return err } otp.SetOriginal(obj) return nil } // undeltify undeltifies an *ObjectToPack by retrieving the original object from // the storer and resetting it. func (dw *deltaSelector) undeltify(otp *ObjectToPack) error { if err := dw.restoreOriginal(otp); err != nil { return err } otp.Object = otp.Original otp.Depth = 0 return nil } func (dw *deltaSelector) sort(objectsToPack []*ObjectToPack) { sort.Sort(byTypeAndSize(objectsToPack)) } func (dw *deltaSelector) walk( objectsToPack []*ObjectToPack, packWindow uint, ) error { indexMap := make(map[plumbing.Hash]*deltaIndex) for i := 0; i < len(objectsToPack); i++ { // Clean up the index map and reconstructed delta objects for anything // outside our pack window, to save memory. if i > int(packWindow) { obj := objectsToPack[i-int(packWindow)] delete(indexMap, obj.Hash()) if obj.IsDelta() { obj.SaveOriginalMetadata() obj.CleanOriginal() } } target := objectsToPack[i] // If we already have a delta, we don't try to find a new one for this // object. This happens when a delta is set to be reused from an existing // packfile. if target.IsDelta() { continue } // We only want to create deltas from specific types. if !applyDelta[target.Type()] { continue } for j := i - 1; j >= 0 && i-j < int(packWindow); j-- { base := objectsToPack[j] // Objects must use only the same type as their delta base. // Since objectsToPack is sorted by type and size, once we find // a different type, we know we won't find more of them. if base.Type() != target.Type() { break } if err := dw.tryToDeltify(indexMap, base, target); err != nil { return err } } } return nil } func (dw *deltaSelector) tryToDeltify(indexMap map[plumbing.Hash]*deltaIndex, base, target *ObjectToPack) error { // Original object might not be present if we're reusing a delta, so we // ensure it is restored. if err := dw.restoreOriginal(target); err != nil { return err } if err := dw.restoreOriginal(base); err != nil { return err } // If the sizes are radically different, this is a bad pairing. if target.Size() < base.Size()>>4 { return nil } msz := dw.deltaSizeLimit( target.Object.Size(), base.Depth, target.Depth, target.IsDelta(), ) // Nearly impossible to fit useful delta. if msz <= 8 { return nil } // If we have to insert a lot to make this work, find another. if base.Size()-target.Size() > msz { return nil } if _, ok := indexMap[base.Hash()]; !ok { indexMap[base.Hash()] = new(deltaIndex) } // Now we can generate the delta using originals delta, err := getDelta(indexMap[base.Hash()], base.Original, target.Original) if err != nil { return err } // if delta better than target if delta.Size() < msz { target.SetDelta(base, delta) } return nil } func (dw *deltaSelector) deltaSizeLimit(targetSize int64, baseDepth int, targetDepth int, targetDelta bool) int64 { if !targetDelta { // Any delta should be no more than 50% of the original size // (for text files deflate of whole form should shrink 50%). n := targetSize >> 1 // Evenly distribute delta size limits over allowed depth. // If src is non-delta (depth = 0), delta <= 50% of original. // If src is almost at limit (9/10), delta <= 10% of original. return n * (maxDepth - int64(baseDepth)) / maxDepth } // With a delta base chosen any new delta must be "better". // Retain the distribution described above. d := int64(targetDepth) n := targetSize // If target depth is bigger than maxDepth, this delta is not suitable to be used. if d >= maxDepth { return 0 } // If src is whole (depth=0) and base is near limit (depth=9/10) // any delta using src can be 10x larger and still be better. // // If src is near limit (depth=9/10) and base is whole (depth=0) // a new delta dependent on src must be 1/10th the size. return n * (maxDepth - int64(baseDepth)) / (maxDepth - d) } type byTypeAndSize []*ObjectToPack func (a byTypeAndSize) Len() int { return len(a) } func (a byTypeAndSize) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a byTypeAndSize) Less(i, j int) bool { if a[i].Type() < a[j].Type() { return false } if a[i].Type() > a[j].Type() { return true } return a[i].Size() > a[j].Size() }
package packfile import ( "bytes" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/utils/ioutil" "github.com/go-git/go-git/v6/utils/sync" ) // See https://github.com/jelmer/dulwich/blob/master/dulwich/pack.py and // https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js // for more info const ( // Standard chunk size used to generate fingerprints s = 16 // https://github.com/git/git/blob/f7466e94375b3be27f229c78873f0acf8301c0a5/diff-delta.c#L428 // Max size of a copy operation (64KB). maxCopySize = 64 * 1024 // Min size of a copy operation. minCopySize = 4 ) // GetDelta returns an EncodedObject of type OFSDeltaObject. Base and Target object, // will be loaded into memory to be able to create the delta object. // To generate target again, you will need the obtained object and "base" one. // Error will be returned if base or target object cannot be read. func GetDelta(base, target plumbing.EncodedObject) (plumbing.EncodedObject, error) { return getDelta(new(deltaIndex), base, target) } func getDelta(index *deltaIndex, base, target plumbing.EncodedObject) (o plumbing.EncodedObject, err error) { br, err := base.Reader() if err != nil { return nil, err } defer ioutil.CheckClose(br, &err) tr, err := target.Reader() if err != nil { return nil, err } defer ioutil.CheckClose(tr, &err) bb := sync.GetBytesBuffer() defer sync.PutBytesBuffer(bb) _, err = bb.ReadFrom(br) if err != nil { return nil, err } tb := sync.GetBytesBuffer() defer sync.PutBytesBuffer(tb) _, err = tb.ReadFrom(tr) if err != nil { return nil, err } db := diffDelta(index, bb.Bytes(), tb.Bytes()) delta := &plumbing.MemoryObject{} _, err = delta.Write(db) if err != nil { return nil, err } delta.SetSize(int64(len(db))) delta.SetType(plumbing.OFSDeltaObject) return delta, nil } // DiffDelta returns the delta that transforms src into tgt. func DiffDelta(src, tgt []byte) []byte { return diffDelta(new(deltaIndex), src, tgt) } func diffDelta(index *deltaIndex, src []byte, tgt []byte) []byte { buf := sync.GetBytesBuffer() defer sync.PutBytesBuffer(buf) buf.Write(deltaEncodeSize(len(src))) buf.Write(deltaEncodeSize(len(tgt))) if len(index.entries) == 0 { index.init(src) } ibuf := sync.GetBytesBuffer() defer sync.PutBytesBuffer(ibuf) for i := 0; i < len(tgt); i++ { offset, l := index.findMatch(src, tgt, i) if l == 0 { // couldn't find a match, just write the current byte and continue ibuf.WriteByte(tgt[i]) } else if l < 0 { // src is less than blksz, copy the rest of the target to avoid // calls to findMatch for ; i < len(tgt); i++ { ibuf.WriteByte(tgt[i]) } } else if l < s { // remaining target is less than blksz, copy what's left of it // and avoid calls to findMatch for j := i; j < i+l; j++ { ibuf.WriteByte(tgt[j]) } i += l - 1 } else { encodeInsertOperation(ibuf, buf) rl := l aOffset := offset for rl > 0 { if rl < maxCopySize { buf.Write(encodeCopyOperation(aOffset, rl)) break } buf.Write(encodeCopyOperation(aOffset, maxCopySize)) rl -= maxCopySize aOffset += maxCopySize } i += l - 1 } } encodeInsertOperation(ibuf, buf) // buf.Bytes() is only valid until the next modifying operation on the buffer. Copy it. return append([]byte{}, buf.Bytes()...) } func encodeInsertOperation(ibuf, buf *bytes.Buffer) { if ibuf.Len() == 0 { return } b := ibuf.Bytes() s := ibuf.Len() o := 0 for { if s <= 127 { break } buf.WriteByte(byte(127)) buf.Write(b[o : o+127]) s -= 127 o += 127 } buf.WriteByte(byte(s)) buf.Write(b[o : o+s]) ibuf.Reset() } func deltaEncodeSize(size int) []byte { var ret []byte c := size & 0x7f size >>= 7 for { if size == 0 { break } ret = append(ret, byte(c|0x80)) c = size & 0x7f size >>= 7 } ret = append(ret, byte(c)) return ret } func encodeCopyOperation(offset, length int) []byte { code := 0x80 var opcodes []byte var i uint for i = 0; i < 4; i++ { f := 0xff << (i * 8) if offset&f != 0 { opcodes = append(opcodes, byte(offset&f>>(i*8))) code |= 0x01 << i } } for i = 0; i < 3; i++ { f := 0xff << (i * 8) if length&f != 0 { opcodes = append(opcodes, byte(length&f>>(i*8))) code |= 0x10 << i } } return append([]byte{byte(code)}, opcodes...) }
package packfile import ( "compress/zlib" "crypto" "fmt" "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/hash" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/binary" "github.com/go-git/go-git/v6/utils/ioutil" ) // Encoder gets the data from the storage and write it into the writer in PACK // format type Encoder struct { selector *deltaSelector w *offsetWriter zw *zlib.Writer hasher plumbing.Hasher useRefDeltas bool } // NewEncoder creates a new packfile encoder using a specific Writer and // EncodedObjectStorer. By default deltas used to generate the packfile will be // OFSDeltaObject. To use Reference deltas, set useRefDeltas to true. func NewEncoder(w io.Writer, s storer.EncodedObjectStorer, useRefDeltas bool) *Encoder { h := plumbing.Hasher{ // TODO: Support passing an ObjectFormat (sha256) Hash: hash.New(crypto.SHA1), } mw := io.MultiWriter(w, h) ow := newOffsetWriter(mw) zw := zlib.NewWriter(mw) return &Encoder{ selector: newDeltaSelector(s), w: ow, zw: zw, hasher: h, useRefDeltas: useRefDeltas, } } // Encode creates a packfile containing all the objects referenced in // hashes and writes it to the writer in the Encoder. `packWindow` // specifies the size of the sliding window used to compare objects // for delta compression; 0 turns off delta compression entirely. func (e *Encoder) Encode( hashes []plumbing.Hash, packWindow uint, ) (plumbing.Hash, error) { objects, err := e.selector.ObjectsToPack(hashes, packWindow) if err != nil { return plumbing.ZeroHash, err } return e.encode(objects) } func (e *Encoder) encode(objects []*ObjectToPack) (plumbing.Hash, error) { if err := e.head(len(objects)); err != nil { return plumbing.ZeroHash, err } for _, o := range objects { if err := e.entry(o); err != nil { return plumbing.ZeroHash, err } } return e.footer() } func (e *Encoder) head(numEntries int) error { return binary.Write( e.w, signature, int32(VersionSupported), int32(numEntries), ) } func (e *Encoder) entry(o *ObjectToPack) (err error) { if o.WantWrite() { // A cycle exists in this delta chain. This should only occur if a // selected object representation disappeared during writing // (for example due to a concurrent repack) and a different base // was chosen, forcing a cycle. Select something other than a // delta, and write this object. e.selector.restoreOriginal(o) o.BackToOriginal() } if o.IsWritten() { return nil } o.MarkWantWrite() if err := e.writeBaseIfDelta(o); err != nil { return err } // We need to check if we already write that object due a cyclic delta chain if o.IsWritten() { return nil } o.Offset = e.w.Offset() if o.IsDelta() { if err := e.writeDeltaHeader(o); err != nil { return err } } else { if err := e.entryHead(o.Type(), o.Size()); err != nil { return err } } e.zw.Reset(e.w) defer ioutil.CheckClose(e.zw, &err) or, err := o.Object.Reader() if err != nil { return err } defer ioutil.CheckClose(or, &err) _, err = io.Copy(e.zw, or) return err } func (e *Encoder) writeBaseIfDelta(o *ObjectToPack) error { if o.IsDelta() && !o.Base.IsWritten() { // We must write base first return e.entry(o.Base) } return nil } func (e *Encoder) writeDeltaHeader(o *ObjectToPack) error { // Write offset deltas by default t := plumbing.OFSDeltaObject if e.useRefDeltas { t = plumbing.REFDeltaObject } if err := e.entryHead(t, o.Object.Size()); err != nil { return err } if e.useRefDeltas { return e.writeRefDeltaHeader(o.Base.Hash()) } else { return e.writeOfsDeltaHeader(o) } } func (e *Encoder) writeRefDeltaHeader(base plumbing.Hash) error { _, err := base.WriteTo(e.w) return err } func (e *Encoder) writeOfsDeltaHeader(o *ObjectToPack) error { // for OFS_DELTA, offset of the base is interpreted as negative offset // relative to the type-byte of the header of the ofs-delta entry. relativeOffset := o.Offset - o.Base.Offset if relativeOffset <= 0 { return fmt.Errorf("bad offset for OFS_DELTA entry: %d", relativeOffset) } return binary.WriteVariableWidthInt(e.w, relativeOffset) } func (e *Encoder) entryHead(typeNum plumbing.ObjectType, size int64) error { t := int64(typeNum) header := []byte{} c := (t << firstLengthBits) | (size & maskFirstLength) size >>= firstLengthBits for { if size == 0 { break } header = append(header, byte(c|maskContinue)) c = size & int64(maskLength) size >>= lengthBits } header = append(header, byte(c)) _, err := e.w.Write(header) return err } func (e *Encoder) footer() (plumbing.Hash, error) { h := e.hasher.Sum() _, err := h.WriteTo(e.w) return h, err } type offsetWriter struct { w io.Writer offset int64 } func newOffsetWriter(w io.Writer) *offsetWriter { return &offsetWriter{w: w} } func (ow *offsetWriter) Write(p []byte) (n int, err error) { n, err = ow.w.Write(p) ow.offset += int64(n) return n, err } func (ow *offsetWriter) Offset() int64 { return ow.offset }
package packfile import "fmt" // Error specifies errors returned during packfile parsing. type Error struct { reason, details string } // NewError returns a new error. func NewError(reason string) *Error { return &Error{reason: reason} } // Error returns a text representation of the error. func (e *Error) Error() string { if e.details == "" { return e.reason } return fmt.Sprintf("%s: %s", e.reason, e.details) } // AddDetails adds details to an error, with additional text. func (e *Error) AddDetails(format string, args ...interface{}) *Error { return &Error{ reason: e.reason, details: fmt.Sprintf(format, args...), } }
package packfile import ( "errors" "io" "os" billy "github.com/go-git/go-billy/v5" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/cache" "github.com/go-git/go-git/v6/plumbing/format/idxfile" "github.com/go-git/go-git/v6/utils/sync" ) // FSObject is an object from the packfile on the filesystem. type FSObject struct { hash plumbing.Hash offset int64 size int64 typ plumbing.ObjectType index idxfile.Index fs billy.Filesystem pack billy.File packPath string cache cache.Object } // NewFSObject creates a new filesystem object. func NewFSObject( hash plumbing.Hash, finalType plumbing.ObjectType, offset int64, contentSize int64, index idxfile.Index, fs billy.Filesystem, pack billy.File, packPath string, cache cache.Object, ) *FSObject { return &FSObject{ hash: hash, offset: offset, size: contentSize, typ: finalType, index: index, fs: fs, pack: pack, packPath: packPath, cache: cache, } } // Reader implements the plumbing.EncodedObject interface. func (o *FSObject) Reader() (io.ReadCloser, error) { obj, ok := o.cache.Get(o.hash) if ok && obj != o { reader, err := obj.Reader() if err != nil { return nil, err } return reader, nil } var closer io.Closer _, err := o.pack.Seek(o.offset, io.SeekStart) // fsobject aims to reuse an existing file descriptor to the packfile. // In some cases that descriptor would already be closed, in such cases, // open the packfile again and close it when the reader is closed. if err != nil && errors.Is(err, os.ErrClosed) { o.pack, err = o.fs.Open(o.packPath) if err != nil { return nil, err } closer = o.pack _, err = o.pack.Seek(o.offset, io.SeekStart) } if err != nil { return nil, err } dict := sync.GetByteSlice() zr := sync.NewZlibReader(dict) err = zr.Reset(o.pack) if err != nil { return nil, err } return &zlibReadCloser{zr, dict, closer}, nil } type zlibReadCloser struct { r sync.ZLibReader dict *[]byte f io.Closer } // Read reads up to len(p) bytes into p from the data. func (r *zlibReadCloser) Read(p []byte) (int, error) { return r.r.Reader.Read(p) } func (r *zlibReadCloser) Close() error { sync.PutByteSlice(r.dict) sync.PutZlibReader(r.r) if r.f != nil { r.f.Close() } return nil } // SetSize implements the plumbing.EncodedObject interface. This method // is a noop. func (o *FSObject) SetSize(int64) {} // SetType implements the plumbing.EncodedObject interface. This method is // a noop. func (o *FSObject) SetType(plumbing.ObjectType) {} // Hash implements the plumbing.EncodedObject interface. func (o *FSObject) Hash() plumbing.Hash { return o.hash } // Size implements the plumbing.EncodedObject interface. func (o *FSObject) Size() int64 { return o.size } // Type implements the plumbing.EncodedObject interface. func (o *FSObject) Type() plumbing.ObjectType { return o.typ } // Writer implements the plumbing.EncodedObject interface. This method always // returns a nil writer. func (o *FSObject) Writer() (io.WriteCloser, error) { return nil, nil }
package packfile import ( "github.com/go-git/go-git/v6/plumbing" ) // ObjectToPack is a representation of an object that is going to be into a // pack file. type ObjectToPack struct { // The main object to pack, it could be any object, including deltas. Object plumbing.EncodedObject // Base is the object that a delta is based on, which could also be another delta. // Nil when the main object is not a delta. Base *ObjectToPack // Original is the object that we can generate applying the delta to // Base, or the same object as Object in the case of a non-delta // object. Original plumbing.EncodedObject // Depth is the amount of deltas needed to resolve to obtain Original // (delta based on delta based on ...) Depth int // offset in pack when object has been already written, or 0 if it // has not been written yet Offset int64 // Information from the original object resolvedOriginal bool originalType plumbing.ObjectType originalSize int64 originalHash plumbing.Hash } // newObjectToPack creates a correct ObjectToPack based on a non-delta object func newObjectToPack(o plumbing.EncodedObject) *ObjectToPack { return &ObjectToPack{ Object: o, Original: o, } } // newDeltaObjectToPack creates a correct ObjectToPack for a delta object, based on // his base (could be another delta), the delta target (in this case called original), // and the delta Object itself func newDeltaObjectToPack(base *ObjectToPack, original, delta plumbing.EncodedObject) *ObjectToPack { return &ObjectToPack{ Object: delta, Base: base, Original: original, Depth: base.Depth + 1, } } // BackToOriginal converts that ObjectToPack to a non-deltified object if it was one func (o *ObjectToPack) BackToOriginal() { if o.IsDelta() && o.Original != nil { o.Object = o.Original o.Base = nil o.Depth = 0 } } // IsWritten returns if that ObjectToPack was // already written into the packfile or not func (o *ObjectToPack) IsWritten() bool { return o.Offset > 1 } // MarkWantWrite marks this ObjectToPack as WantWrite // to avoid delta chain loops func (o *ObjectToPack) MarkWantWrite() { o.Offset = 1 } // WantWrite checks if this ObjectToPack was marked as WantWrite before func (o *ObjectToPack) WantWrite() bool { return o.Offset == 1 } // SetOriginal sets both Original and saves size, type and hash. If object // is nil Original is set but previous resolved values are kept func (o *ObjectToPack) SetOriginal(obj plumbing.EncodedObject) { o.Original = obj o.SaveOriginalMetadata() } // SaveOriginalMetadata saves size, type and hash of Original object func (o *ObjectToPack) SaveOriginalMetadata() { if o.Original != nil { o.originalSize = o.Original.Size() o.originalType = o.Original.Type() o.originalHash = o.Original.Hash() o.resolvedOriginal = true } } // CleanOriginal sets Original to nil func (o *ObjectToPack) CleanOriginal() { o.Original = nil } func (o *ObjectToPack) Type() plumbing.ObjectType { if o.Original != nil { return o.Original.Type() } if o.resolvedOriginal { return o.originalType } if o.Base != nil { return o.Base.Type() } if o.Object != nil { return o.Object.Type() } panic("cannot get type") } func (o *ObjectToPack) Hash() plumbing.Hash { if o.Original != nil { return o.Original.Hash() } if o.resolvedOriginal { return o.originalHash } do, ok := o.Object.(plumbing.DeltaObject) if ok { return do.ActualHash() } panic("cannot get hash") } func (o *ObjectToPack) Size() int64 { if o.Original != nil { return o.Original.Size() } if o.resolvedOriginal { return o.originalSize } do, ok := o.Object.(plumbing.DeltaObject) if ok { return do.ActualSize() } panic("cannot get ObjectToPack size") } func (o *ObjectToPack) IsDelta() bool { return o.Base != nil } func (o *ObjectToPack) SetDelta(base *ObjectToPack, delta plumbing.EncodedObject) { o.Object = delta o.Base = base o.Depth = base.Depth + 1 }
package packfile import ( "crypto" "fmt" "io" "os" "sync" billy "github.com/go-git/go-billy/v5" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/cache" format "github.com/go-git/go-git/v6/plumbing/format/config" "github.com/go-git/go-git/v6/plumbing/format/idxfile" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/ioutil" ) var ( // ErrInvalidObject is returned by Decode when an invalid object is // found in the packfile. ErrInvalidObject = NewError("invalid git object") // ErrZLib is returned by Decode when there was an error unzipping // the packfile contents. ErrZLib = NewError("zlib reading error") ) // Packfile allows retrieving information from inside a packfile. type Packfile struct { idxfile.Index fs billy.Filesystem file billy.File scanner *Scanner cache cache.Object id plumbing.Hash m sync.Mutex objectIdSize int once sync.Once onceErr error } // NewPackfile returns a packfile representation for the given packfile file // and packfile idx. // If the filesystem is provided, the packfile will return FSObjects, otherwise // it will return MemoryObjects. func NewPackfile( file billy.File, opts ...PackfileOption, ) *Packfile { p := &Packfile{ file: file, objectIdSize: crypto.SHA1.Size(), } for _, opt := range opts { opt(p) } return p } // Get retrieves the encoded object in the packfile with the given hash. func (p *Packfile) Get(h plumbing.Hash) (plumbing.EncodedObject, error) { if err := p.init(); err != nil { return nil, err } p.m.Lock() defer p.m.Unlock() return p.get(h) } // GetByOffset retrieves the encoded object from the packfile at the given // offset. func (p *Packfile) GetByOffset(offset int64) (plumbing.EncodedObject, error) { if err := p.init(); err != nil { return nil, err } p.m.Lock() defer p.m.Unlock() return p.getByOffset(offset) } // GetSizeByOffset retrieves the size of the encoded object from the // packfile with the given offset. func (p *Packfile) GetSizeByOffset(offset int64) (size int64, err error) { if err := p.init(); err != nil { return 0, err } d, err := p.GetByOffset(offset) if err != nil { return 0, err } return d.Size(), nil } // GetAll returns an iterator with all encoded objects in the packfile. // The iterator returned is not thread-safe, it should be used in the same // thread as the Packfile instance. func (p *Packfile) GetAll() (storer.EncodedObjectIter, error) { return p.GetByType(plumbing.AnyObject) } // GetByType returns all the objects of the given type. func (p *Packfile) GetByType(typ plumbing.ObjectType) (storer.EncodedObjectIter, error) { if err := p.init(); err != nil { return nil, err } switch typ { case plumbing.AnyObject, plumbing.BlobObject, plumbing.TreeObject, plumbing.CommitObject, plumbing.TagObject: entries, err := p.EntriesByOffset() if err != nil { return nil, err } return &objectIter{ p: p, iter: entries, typ: typ, }, nil default: return nil, plumbing.ErrInvalidType } } // Returns the Packfile's inner scanner. // // Deprecated: this will be removed in future versions of the packfile package // to avoid exposing the package internals and to improve its thread-safety. // TODO: Remove Scanner method func (p *Packfile) Scanner() (*Scanner, error) { if err := p.init(); err != nil { return nil, err } return p.scanner, nil } // ID returns the ID of the packfile, which is the checksum at the end of it. func (p *Packfile) ID() (plumbing.Hash, error) { if err := p.init(); err != nil { return plumbing.ZeroHash, err } return p.id, nil } // get is not threat-safe, and should only be called within packfile.go. func (p *Packfile) get(h plumbing.Hash) (plumbing.EncodedObject, error) { if obj, ok := p.cache.Get(h); ok { return obj, nil } offset, err := p.Index.FindOffset(h) if err != nil { return nil, err } oh, err := p.headerFromOffset(offset) if err != nil { return nil, err } return p.objectFromHeader(oh) } // getByOffset is not threat-safe, and should only be called within packfile.go. func (p *Packfile) getByOffset(offset int64) (plumbing.EncodedObject, error) { h, err := p.FindHash(offset) if err != nil { return nil, err } if obj, ok := p.cache.Get(h); ok { return obj, nil } oh, err := p.headerFromOffset(offset) if err != nil { return nil, err } return p.objectFromHeader(oh) } func (p *Packfile) init() error { p.once.Do(func() { if p.file == nil { p.onceErr = fmt.Errorf("file is not set") return } if p.Index == nil { p.onceErr = fmt.Errorf("index is not set") return } var opts []ScannerOption if p.objectIdSize == format.SHA256Size { opts = append(opts, WithSHA256()) } p.scanner = NewScanner(p.file, opts...) // Validate packfile signature. if !p.scanner.Scan() { p.onceErr = p.scanner.Error() return } _, err := p.scanner.Seek(-int64(p.objectIdSize), io.SeekEnd) if err != nil { p.onceErr = err return } p.id.ResetBySize(p.objectIdSize) _, err = p.id.ReadFrom(p.scanner) if err != nil { p.onceErr = err } if p.cache == nil { p.cache = cache.NewObjectLRUDefault() } }) return p.onceErr } func (p *Packfile) headerFromOffset(offset int64) (*ObjectHeader, error) { err := p.scanner.SeekFromStart(offset) if err != nil { return nil, err } if !p.scanner.Scan() { return nil, plumbing.ErrObjectNotFound } oh := p.scanner.Data().Value().(ObjectHeader) return &oh, nil } // Close the packfile and its resources. func (p *Packfile) Close() error { p.m.Lock() defer p.m.Unlock() closer, ok := p.file.(io.Closer) if !ok { return nil } return closer.Close() } func (p *Packfile) objectFromHeader(oh *ObjectHeader) (plumbing.EncodedObject, error) { if oh == nil { return nil, plumbing.ErrObjectNotFound } // If we have filesystem, and the object is not a delta type, return a FSObject. // This avoids having to inflate the object more than once. if !oh.Type.IsDelta() && p.fs != nil { fs := NewFSObject( oh.ID(), oh.Type, oh.ContentOffset, oh.Size, p.Index, p.fs, p.file, p.file.Name(), p.cache, ) p.cache.Put(fs) return fs, nil } return p.getMemoryObject(oh) } func (p *Packfile) getMemoryObject(oh *ObjectHeader) (plumbing.EncodedObject, error) { var obj = new(plumbing.MemoryObject) obj.SetSize(oh.Size) obj.SetType(oh.Type) w, err := obj.Writer() if err != nil { return nil, err } defer ioutil.CheckClose(w, &err) switch oh.Type { case plumbing.CommitObject, plumbing.TreeObject, plumbing.BlobObject, plumbing.TagObject: err = p.scanner.inflateContent(oh.ContentOffset, w) case plumbing.REFDeltaObject, plumbing.OFSDeltaObject: var parent plumbing.EncodedObject switch oh.Type { case plumbing.REFDeltaObject: var ok bool parent, ok = p.cache.Get(oh.Reference) if !ok { parent, err = p.get(oh.Reference) } case plumbing.OFSDeltaObject: parent, err = p.getByOffset(oh.OffsetReference) } if err != nil { return nil, fmt.Errorf("cannot find base object: %w", err) } err = p.scanner.inflateContent(oh.ContentOffset, &oh.content) if err != nil { return nil, fmt.Errorf("cannot inflate content: %w", err) } obj.SetType(parent.Type()) err = ApplyDelta(obj, parent, oh.content.Bytes()) //nolint:ineffassign default: err = ErrInvalidObject.AddDetails("type %q", oh.Type) } if err != nil { return nil, err } p.cache.Put(obj) return obj, nil } // isInvalid checks whether an error is an os.PathError with an os.ErrInvalid // error inside. It also checks for the windows error, which is different from // os.ErrInvalid. func isInvalid(err error) bool { pe, ok := err.(*os.PathError) if !ok { return false } errstr := pe.Err.Error() return errstr == errInvalidUnix || errstr == errInvalidWindows } // errInvalidWindows is the Windows equivalent to os.ErrInvalid const errInvalidWindows = "The parameter is incorrect." var errInvalidUnix = os.ErrInvalid.Error()
package packfile import ( "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/format/idxfile" ) type objectIter struct { p *Packfile typ plumbing.ObjectType iter idxfile.EntryIter } func (i *objectIter) Next() (plumbing.EncodedObject, error) { if err := i.p.init(); err != nil { return nil, err } i.p.m.Lock() defer i.p.m.Unlock() return i.next() } func (i *objectIter) next() (plumbing.EncodedObject, error) { for { e, err := i.iter.Next() if err != nil { return nil, err } oh, err := i.p.headerFromOffset(int64(e.Offset)) if err != nil { return nil, err } if i.typ == plumbing.AnyObject { return i.p.objectFromHeader(oh) } // Current object header type is a delta, get the actual object to // assess the actual type. if oh.Type.IsDelta() { o, err := i.p.objectFromHeader(oh) if o.Type() == i.typ { return o, err } continue } if oh.Type == i.typ { return i.p.objectFromHeader(oh) } continue } } func (i *objectIter) ForEach(f func(plumbing.EncodedObject) error) error { if err := i.p.init(); err != nil { return err } i.p.m.Lock() defer i.p.m.Unlock() for { o, err := i.next() if err != nil { if err == io.EOF { return nil } return err } if err := f(o); err != nil { return err } } } func (i *objectIter) Close() { i.p.m.Lock() defer i.p.m.Unlock() i.iter.Close() }
package packfile import ( billy "github.com/go-git/go-billy/v5" "github.com/go-git/go-git/v6/plumbing/cache" "github.com/go-git/go-git/v6/plumbing/format/idxfile" ) type PackfileOption func(*Packfile) // WithCache sets the cache to be used throughout Packfile operations. // Use this to share existing caches with the Packfile. If not used, a // new cache instance will be created. func WithCache(cache cache.Object) PackfileOption { return func(p *Packfile) { p.cache = cache } } // WithIdx sets the idxfile for the packfile. func WithIdx(idx idxfile.Index) PackfileOption { return func(p *Packfile) { p.Index = idx } } // WithFs sets the filesystem to be used. func WithFs(fs billy.Filesystem) PackfileOption { return func(p *Packfile) { p.fs = fs } } // WithObjectIDSize sets the size of the object IDs inside the packfile. // Valid options are hash.SHA1Size and hash.SHA256Size. // // When no object ID size is set, hash.SHA1Size will be used. func WithObjectIDSize(sz int) PackfileOption { return func(p *Packfile) { p.objectIdSize = sz } }
package packfile import ( "bytes" "errors" "fmt" "io" stdsync "sync" "github.com/go-git/go-git/v6/plumbing" format "github.com/go-git/go-git/v6/plumbing/format/config" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/ioutil" ) var ( // ErrReferenceDeltaNotFound is returned when the reference delta is not // found. ErrReferenceDeltaNotFound = errors.New("reference delta not found") // ErrNotSeekableSource is returned when the source for the parser is not // seekable and a storage was not provided, so it can't be parsed. ErrNotSeekableSource = errors.New("parser source is not seekable and storage was not provided") // ErrDeltaNotCached is returned when the delta could not be found in cache. ErrDeltaNotCached = errors.New("delta could not be found in cache") ) // Parser decodes a packfile and calls any observer associated to it. Is used // to generate indexes. type Parser struct { storage storer.EncodedObjectStorer cache *parserCache scanner *Scanner observers []Observer hasher plumbing.Hasher checksum plumbing.Hash m stdsync.Mutex } // NewParser creates a new Parser. // When a storage is set, the objects are written to storage as they // are parsed. func NewParser(data io.Reader, opts ...ParserOption) *Parser { p := &Parser{ hasher: plumbing.NewHasher(format.SHA1, plumbing.AnyObject, 0), } for _, opt := range opts { opt(p) } p.scanner = NewScanner(data) if p.storage != nil { p.scanner.storage = p.storage } p.cache = newParserCache() return p } func (p *Parser) storeOrCache(oh *ObjectHeader) error { // Only need to store deltas, as the scanner already stored non-delta // objects. if p.storage != nil && oh.diskType.IsDelta() { w, err := p.storage.RawObjectWriter(oh.Type, oh.Size) if err != nil { return err } defer w.Close() _, err = io.Copy(w, bytes.NewReader(oh.content.Bytes())) if err != nil { return err } } if p.cache != nil { p.cache.Add(oh) } if err := p.onInflatedObjectHeader(oh.Type, oh.Size, oh.Offset); err != nil { return err } if err := p.onInflatedObjectContent(oh.Hash, oh.Offset, oh.Crc32, nil); err != nil { return err } return nil } func (p *Parser) resetCache(qty int) { if p.cache != nil { p.cache.Reset(qty) } } // Parse start decoding phase of the packfile. func (p *Parser) Parse() (plumbing.Hash, error) { p.m.Lock() defer p.m.Unlock() var pendingDeltas []*ObjectHeader var pendingDeltaREFs []*ObjectHeader for p.scanner.Scan() { data := p.scanner.Data() switch data.Section { case HeaderSection: header := data.Value().(Header) p.resetCache(int(header.ObjectsQty)) p.onHeader(header.ObjectsQty) case ObjectSection: oh := data.Value().(ObjectHeader) if oh.Type.IsDelta() { if oh.Type == plumbing.OFSDeltaObject { pendingDeltas = append(pendingDeltas, &oh) } else if oh.Type == plumbing.REFDeltaObject { pendingDeltaREFs = append(pendingDeltaREFs, &oh) } continue } else { p.storeOrCache(&oh) } case FooterSection: p.checksum = data.Value().(plumbing.Hash) } } if p.scanner.objects == 0 { return plumbing.ZeroHash, ErrEmptyPackfile } for _, oh := range pendingDeltaREFs { err := p.processDelta(oh) if err != nil { return plumbing.ZeroHash, err } } for _, oh := range pendingDeltas { err := p.processDelta(oh) if err != nil { return plumbing.ZeroHash, err } } return p.checksum, p.onFooter(p.checksum) } func (p *Parser) processDelta(oh *ObjectHeader) error { switch oh.Type { case plumbing.OFSDeltaObject: pa, ok := p.cache.oiByOffset[oh.OffsetReference] if !ok { return plumbing.ErrObjectNotFound } oh.parent = pa case plumbing.REFDeltaObject: pa, ok := p.cache.oiByHash[oh.Reference] if !ok { // can't find referenced object in this pack file // this must be a "thin" pack. oh.parent = &ObjectHeader{ // Placeholder parent Hash: oh.Reference, externalRef: true, // mark as an external reference that must be resolved Type: plumbing.AnyObject, diskType: plumbing.AnyObject, } } else { oh.parent = pa } p.cache.oiByHash[oh.Reference] = oh.parent default: return fmt.Errorf("unsupported delta type: %v", oh.Type) } parentContents, err := p.parentReader(oh.parent) if err != nil { return err } var deltaData bytes.Buffer if oh.content.Len() > 0 { _, err = oh.content.WriteTo(&deltaData) if err != nil { return err } } else { deltaData = *bytes.NewBuffer(make([]byte, 0, oh.Size)) err = p.scanner.inflateContent(oh.ContentOffset, &deltaData) if err != nil { return err } } w, err := p.cacheWriter(oh) if err != nil { return err } defer w.Close() err = applyPatchBaseHeader(oh, parentContents, &deltaData, w, nil) if err != nil { return err } return p.storeOrCache(oh) } func (p *Parser) parentReader(parent *ObjectHeader) (io.ReaderAt, error) { // If parent is a Delta object, the inflated object must come // from either cache or storage, else we would need to inflate // it to then inflate the current object, which could go on // indefinitely. if p.storage != nil && parent.Hash != plumbing.ZeroHash { obj, err := p.storage.EncodedObject(parent.Type, parent.Hash) if err == nil { // Ensure that external references have the correct type and size. parent.Type = obj.Type() parent.Size = obj.Size() r, err := obj.Reader() if err == nil { parentData := bytes.NewBuffer(make([]byte, 0, parent.Size)) _, err = io.Copy(parentData, r) r.Close() if err == nil { return bytes.NewReader(parentData.Bytes()), nil } } } } if p.cache != nil && parent.content.Len() > 0 { return bytes.NewReader(parent.content.Bytes()), nil } // If the parent is not an external ref and we don't have the // content offset, we won't be able to inflate via seeking through // the packfile. if !parent.externalRef && parent.ContentOffset == 0 { return nil, plumbing.ErrObjectNotFound } // Not a seeker data source, so avoid seeking the content. if p.scanner.seeker == nil { return nil, plumbing.ErrObjectNotFound } parentData := bytes.NewBuffer(make([]byte, 0, parent.Size)) err := p.scanner.inflateContent(parent.ContentOffset, parentData) if err != nil { return nil, ErrReferenceDeltaNotFound } return bytes.NewReader(parentData.Bytes()), nil } func (p *Parser) cacheWriter(oh *ObjectHeader) (io.WriteCloser, error) { return ioutil.NewWriteCloser(&oh.content, nil), nil } func applyPatchBaseHeader(ota *ObjectHeader, base io.ReaderAt, delta io.Reader, target io.Writer, wh objectHeaderWriter) error { if target == nil { return fmt.Errorf("cannot apply patch against nil target") } typ := ota.Type if ota.Hash == plumbing.ZeroHash { typ = ota.parent.Type } sz, h, err := patchDeltaWriter(target, base, delta, typ, wh) if err != nil { return err } if ota.Hash == plumbing.ZeroHash { ota.Type = typ ota.Size = int64(sz) ota.Hash = h } return nil } func (p *Parser) forEachObserver(f func(o Observer) error) error { for _, o := range p.observers { if err := f(o); err != nil { return err } } return nil } func (p *Parser) onHeader(count uint32) error { return p.forEachObserver(func(o Observer) error { return o.OnHeader(count) }) } func (p *Parser) onInflatedObjectHeader( t plumbing.ObjectType, objSize int64, pos int64, ) error { return p.forEachObserver(func(o Observer) error { return o.OnInflatedObjectHeader(t, objSize, pos) }) } func (p *Parser) onInflatedObjectContent( h plumbing.Hash, pos int64, crc uint32, content []byte, ) error { return p.forEachObserver(func(o Observer) error { return o.OnInflatedObjectContent(h, pos, crc, content) }) } func (p *Parser) onFooter(h plumbing.Hash) error { return p.forEachObserver(func(o Observer) error { return o.OnFooter(h) }) }
package packfile import ( "slices" "github.com/go-git/go-git/v6/plumbing" "golang.org/x/exp/maps" ) func newParserCache() *parserCache { c := &parserCache{} return c } // parserCache defines the cache used within the parser. // This is not thread safe by itself, and relies on the parser to // enforce thread-safety. type parserCache struct { oi []*ObjectHeader oiByHash map[plumbing.Hash]*ObjectHeader oiByOffset map[int64]*ObjectHeader } func (c *parserCache) Add(oh *ObjectHeader) { c.oiByHash[oh.Hash] = oh c.oiByOffset[oh.Offset] = oh c.oi = append(c.oi, oh) } func (c *parserCache) Reset(n int) { if c.oi == nil { c.oi = make([]*ObjectHeader, 0, n) c.oiByHash = make(map[plumbing.Hash]*ObjectHeader, n) c.oiByOffset = make(map[int64]*ObjectHeader, n) } else { c.oi = c.oi[:0] c.oi = slices.Grow(c.oi, n) maps.Clear(c.oiByHash) maps.Clear(c.oiByOffset) } }
package packfile import ( "github.com/go-git/go-git/v6/plumbing/storer" ) type ParserOption func(*Parser) // WithStorage sets the storage to be used while parsing a pack file. func WithStorage(storage storer.EncodedObjectStorer) ParserOption { return func(p *Parser) { p.storage = storage } } // WithScannerObservers sets the observers to be notified during the // scanning or parsing of a pack file. The scanner is responsible for // notifying observers around general pack file information, such as // header and footer. The scanner also notifies object headers for // non-delta objects. // // Delta objects are notified as part of the parser logic. func WithScannerObservers(ob ...Observer) ParserOption { return func(p *Parser) { p.observers = ob } }
package packfile import ( "bufio" "bytes" "errors" "fmt" "io" "math" "github.com/go-git/go-git/v6/plumbing" format "github.com/go-git/go-git/v6/plumbing/format/config" "github.com/go-git/go-git/v6/utils/ioutil" "github.com/go-git/go-git/v6/utils/sync" ) // See https://github.com/git/git/blob/49fa3dc76179e04b0833542fa52d0f287a4955ac/delta.h // https://github.com/git/git/blob/c2c5f6b1e479f2c38e0e01345350620944e3527f/patch-delta.c, // and https://github.com/tarruda/node-git-core/blob/master/src/js/delta.js // for details about the delta format. var ( ErrInvalidDelta = errors.New("invalid delta") ErrDeltaCmd = errors.New("wrong delta command") ) const ( payload = 0x7f // 0111 1111 continuation = 0x80 // 1000 0000 // maxPatchPreemptionSize defines what is the max size of bytes to be // premptively made available for a patch operation. maxPatchPreemptionSize uint = 65536 // minDeltaSize defines the smallest size for a delta. minDeltaSize = 4 ) type offset struct { mask byte shift uint } var offsets = []offset{ {mask: 0x01, shift: 0}, {mask: 0x02, shift: 8}, {mask: 0x04, shift: 16}, {mask: 0x08, shift: 24}, } var sizes = []offset{ {mask: 0x10, shift: 0}, {mask: 0x20, shift: 8}, {mask: 0x40, shift: 16}, } // ApplyDelta writes to target the result of applying the modification deltas in delta to base. func ApplyDelta(target, base plumbing.EncodedObject, delta []byte) (err error) { r, err := base.Reader() if err != nil { return err } defer ioutil.CheckClose(r, &err) w, err := target.Writer() if err != nil { return err } defer ioutil.CheckClose(w, &err) buf := sync.GetBytesBuffer() defer sync.PutBytesBuffer(buf) _, err = buf.ReadFrom(r) if err != nil { return err } src := buf.Bytes() dst := sync.GetBytesBuffer() defer sync.PutBytesBuffer(dst) err = patchDelta(dst, src, delta) if err != nil { return err } target.SetSize(int64(dst.Len())) b := sync.GetByteSlice() _, err = io.CopyBuffer(w, dst, *b) sync.PutByteSlice(b) return err } // PatchDelta returns the result of applying the modification deltas in delta to src. // An error will be returned if delta is corrupted (ErrInvalidDelta) or an action command // is not copy from source or copy from delta (ErrDeltaCmd). func PatchDelta(src, delta []byte) ([]byte, error) { if len(src) == 0 || len(delta) < minDeltaSize { return nil, ErrInvalidDelta } b := &bytes.Buffer{} if err := patchDelta(b, src, delta); err != nil { return nil, err } return b.Bytes(), nil } func ReaderFromDelta(base plumbing.EncodedObject, deltaRC io.Reader) (io.ReadCloser, error) { deltaBuf := bufio.NewReaderSize(deltaRC, 1024) srcSz, err := decodeLEB128ByteReader(deltaBuf) if err != nil { if err == io.EOF { return nil, ErrInvalidDelta } return nil, err } if srcSz != uint(base.Size()) { return nil, ErrInvalidDelta } targetSz, err := decodeLEB128ByteReader(deltaBuf) if err != nil { if err == io.EOF { return nil, ErrInvalidDelta } return nil, err } remainingTargetSz := targetSz dstRd, dstWr := io.Pipe() go func() { baseRd, err := base.Reader() if err != nil { _ = dstWr.CloseWithError(ErrInvalidDelta) return } defer baseRd.Close() baseBuf := bufio.NewReader(baseRd) basePos := uint(0) for { cmd, err := deltaBuf.ReadByte() if err == io.EOF { _ = dstWr.CloseWithError(ErrInvalidDelta) return } if err != nil { _ = dstWr.CloseWithError(err) return } switch { case isCopyFromSrc(cmd): offset, err := decodeOffsetByteReader(cmd, deltaBuf) if err != nil { _ = dstWr.CloseWithError(err) return } sz, err := decodeSizeByteReader(cmd, deltaBuf) if err != nil { _ = dstWr.CloseWithError(err) return } if invalidSize(sz, targetSz) || invalidOffsetSize(offset, sz, srcSz) { _ = dstWr.Close() return } discard := offset - basePos if basePos > offset { _ = baseRd.Close() baseRd, err = base.Reader() if err != nil { _ = dstWr.CloseWithError(ErrInvalidDelta) return } baseBuf.Reset(baseRd) discard = offset } for discard > math.MaxInt32 { n, err := baseBuf.Discard(math.MaxInt32) if err != nil { _ = dstWr.CloseWithError(err) return } basePos += uint(n) discard -= uint(n) } for discard > 0 { n, err := baseBuf.Discard(int(discard)) if err != nil { _ = dstWr.CloseWithError(err) return } basePos += uint(n) discard -= uint(n) } if _, err := io.Copy(dstWr, io.LimitReader(baseBuf, int64(sz))); err != nil { _ = dstWr.CloseWithError(err) return } remainingTargetSz -= sz basePos += sz case isCopyFromDelta(cmd): sz := uint(cmd) // cmd is the size itself if invalidSize(sz, targetSz) { _ = dstWr.CloseWithError(ErrInvalidDelta) return } if _, err := io.Copy(dstWr, io.LimitReader(deltaBuf, int64(sz))); err != nil { _ = dstWr.CloseWithError(err) return } remainingTargetSz -= sz default: _ = dstWr.CloseWithError(ErrDeltaCmd) return } if remainingTargetSz <= 0 { _ = dstWr.Close() return } } }() return dstRd, nil } func patchDelta(dst *bytes.Buffer, src, delta []byte) error { if len(delta) < minCopySize { return ErrInvalidDelta } srcSz, delta := decodeLEB128(delta) if srcSz != uint(len(src)) { return ErrInvalidDelta } targetSz, delta := decodeLEB128(delta) remainingTargetSz := targetSz var cmd byte growSz := min(targetSz, maxPatchPreemptionSize) dst.Grow(int(growSz)) for { if len(delta) == 0 { return ErrInvalidDelta } cmd = delta[0] delta = delta[1:] switch { case isCopyFromSrc(cmd): var offset, sz uint var err error offset, delta, err = decodeOffset(cmd, delta) if err != nil { return err } sz, delta, err = decodeSize(cmd, delta) if err != nil { return err } if invalidSize(sz, targetSz) || invalidOffsetSize(offset, sz, srcSz) { break } dst.Write(src[offset : offset+sz]) remainingTargetSz -= sz case isCopyFromDelta(cmd): sz := uint(cmd) // cmd is the size itself if invalidSize(sz, targetSz) { return ErrInvalidDelta } if uint(len(delta)) < sz { return ErrInvalidDelta } dst.Write(delta[0:sz]) remainingTargetSz -= sz delta = delta[sz:] default: return ErrDeltaCmd } if remainingTargetSz <= 0 { break } } return nil } func patchDeltaWriter(dst io.Writer, base io.ReaderAt, delta io.Reader, typ plumbing.ObjectType, writeHeader objectHeaderWriter, ) (uint, plumbing.Hash, error) { deltaBuf := bufio.NewReaderSize(delta, 1024) srcSz, err := decodeLEB128ByteReader(deltaBuf) if err != nil { if err == io.EOF { return 0, plumbing.ZeroHash, ErrInvalidDelta } return 0, plumbing.ZeroHash, err } if r, ok := base.(*bytes.Reader); ok && srcSz != uint(r.Size()) { return 0, plumbing.ZeroHash, ErrInvalidDelta } targetSz, err := decodeLEB128ByteReader(deltaBuf) if err != nil { if err == io.EOF { return 0, plumbing.ZeroHash, ErrInvalidDelta } return 0, plumbing.ZeroHash, err } // If header still needs to be written, caller will provide // a LazyObjectWriterHeader. This seems to be the case when // dealing with thin-packs. if writeHeader != nil { err = writeHeader(typ, int64(targetSz)) if err != nil { return 0, plumbing.ZeroHash, fmt.Errorf("could not lazy write header: %w", err) } } remainingTargetSz := targetSz hasher := plumbing.NewHasher(format.SHA1, typ, int64(targetSz)) mw := io.MultiWriter(dst, hasher) bufp := sync.GetByteSlice() defer sync.PutByteSlice(bufp) sr := io.NewSectionReader(base, int64(0), int64(srcSz)) // Keep both the io.LimitedReader types, so we can reset N. baselr := io.LimitReader(sr, 0).(*io.LimitedReader) deltalr := io.LimitReader(deltaBuf, 0).(*io.LimitedReader) for { buf := *bufp cmd, err := deltaBuf.ReadByte() if err == io.EOF { return 0, plumbing.ZeroHash, ErrInvalidDelta } if err != nil { return 0, plumbing.ZeroHash, err } if isCopyFromSrc(cmd) { offset, err := decodeOffsetByteReader(cmd, deltaBuf) if err != nil { return 0, plumbing.ZeroHash, err } sz, err := decodeSizeByteReader(cmd, deltaBuf) if err != nil { return 0, plumbing.ZeroHash, err } if invalidSize(sz, targetSz) || invalidOffsetSize(offset, sz, srcSz) { return 0, plumbing.ZeroHash, err } if _, err := sr.Seek(int64(offset), io.SeekStart); err != nil { return 0, plumbing.ZeroHash, err } baselr.N = int64(sz) if _, err := io.CopyBuffer(mw, baselr, buf); err != nil { return 0, plumbing.ZeroHash, err } remainingTargetSz -= sz } else if isCopyFromDelta(cmd) { sz := uint(cmd) // cmd is the size itself if invalidSize(sz, targetSz) { return 0, plumbing.ZeroHash, ErrInvalidDelta } deltalr.N = int64(sz) if _, err := io.CopyBuffer(mw, deltalr, buf); err != nil { return 0, plumbing.ZeroHash, err } remainingTargetSz -= sz } else { return 0, plumbing.ZeroHash, err } if remainingTargetSz <= 0 { break } } return targetSz, hasher.Sum(), nil } // Decodes a number encoded as an unsigned LEB128 at the start of some // binary data and returns the decoded number and the rest of the // stream. // // This must be called twice on the delta data buffer, first to get the // expected source buffer size, and again to get the target buffer size. func decodeLEB128(input []byte) (uint, []byte) { if len(input) == 0 { return 0, input } var num, sz uint var b byte for { b = input[sz] num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks sz++ if uint(b)&continuation == 0 || sz == uint(len(input)) { break } } return num, input[sz:] } func decodeLEB128ByteReader(input io.ByteReader) (uint, error) { var num, sz uint for { b, err := input.ReadByte() if err != nil { return 0, err } num |= (uint(b) & payload) << (sz * 7) // concats 7 bits chunks sz++ if uint(b)&continuation == 0 { break } } return num, nil } func isCopyFromSrc(cmd byte) bool { return (cmd & continuation) != 0 } func isCopyFromDelta(cmd byte) bool { return (cmd&continuation) == 0 && cmd != 0 } func decodeOffsetByteReader(cmd byte, delta io.ByteReader) (uint, error) { var offset uint for _, o := range offsets { if (cmd & o.mask) != 0 { next, err := delta.ReadByte() if err != nil { return 0, err } offset |= uint(next) << o.shift } } return offset, nil } func decodeOffset(cmd byte, delta []byte) (uint, []byte, error) { var offset uint for _, o := range offsets { if (cmd & o.mask) != 0 { if len(delta) == 0 { return 0, nil, ErrInvalidDelta } offset |= uint(delta[0]) << o.shift delta = delta[1:] } } return offset, delta, nil } func decodeSizeByteReader(cmd byte, delta io.ByteReader) (uint, error) { var sz uint for _, s := range sizes { if (cmd & s.mask) != 0 { next, err := delta.ReadByte() if err != nil { return 0, err } sz |= uint(next) << s.shift } } if sz == 0 { sz = maxCopySize } return sz, nil } func decodeSize(cmd byte, delta []byte) (uint, []byte, error) { var sz uint for _, s := range sizes { if (cmd & s.mask) != 0 { if len(delta) == 0 { return 0, nil, ErrInvalidDelta } sz |= uint(delta[0]) << s.shift delta = delta[1:] } } if sz == 0 { sz = maxCopySize } return sz, delta, nil } func invalidSize(sz, targetSz uint) bool { return sz > targetSz } func invalidOffsetSize(offset, sz, srcSz uint) bool { return sumOverflows(offset, sz) || offset+sz > srcSz } func sumOverflows(a, b uint) bool { return a+b < a }
package packfile import ( "bytes" "crypto" "encoding/hex" "fmt" "hash" "hash/crc32" "io" "sync" "github.com/go-git/go-git/v6/plumbing" format "github.com/go-git/go-git/v6/plumbing/format/config" gogithash "github.com/go-git/go-git/v6/plumbing/hash" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/binary" gogitsync "github.com/go-git/go-git/v6/utils/sync" ) var ( // ErrEmptyPackfile is returned by ReadHeader when no data is found in the packfile. ErrEmptyPackfile = NewError("empty packfile") // ErrBadSignature is returned by ReadHeader when the signature in the packfile is incorrect. ErrBadSignature = NewError("malformed pack file signature") // ErrMalformedPackfile is returned when the packfile format is incorrect. ErrMalformedPackfile = NewError("malformed pack file") // ErrUnsupportedVersion is returned by ReadHeader when the packfile version is // different than VersionSupported. ErrUnsupportedVersion = NewError("unsupported packfile version") // ErrSeekNotSupported returned if seek is not support. ErrSeekNotSupported = NewError("not seek support") ) // Scanner provides sequential access to the data stored in a Git packfile. // // A Git packfile is a compressed binary format that stores multiple Git objects, // such as commits, trees, delta objects and blobs. These packfiles are used to // reduce the size of data when transferring or storing Git repositories. // // A Git packfile is structured as follows: // // +----------------------------------------------------+ // | PACK File Header | // +----------------------------------------------------+ // | "PACK" | Version Number | Number of Objects | // | (4 bytes) | (4 bytes) | (4 bytes) | // +----------------------------------------------------+ // | Object Entry #1 | // +----------------------------------------------------+ // | Object Header | Compressed Object Data / Delta | // | (type + size) | (var-length, zlib compressed) | // +----------------------------------------------------+ // | ... | // +----------------------------------------------------+ // | PACK File Footer | // +----------------------------------------------------+ // | SHA-1 Checksum (20 bytes) | // +----------------------------------------------------+ // // For upstream docs, refer to https://git-scm.com/docs/gitformat-pack. type Scanner struct { // version holds the packfile version. version Version // objects holds the quantiy of objects within the packfile. objects uint32 // objIndex is the current index when going through the packfile objects. objIndex int // hasher is used to hash non-delta objects. hasher plumbing.Hasher // hasher256 is optional and used to hash the non-delta objects using SHA256. hasher256 *plumbing.Hasher // crc is used to generate the CRC-32 checksum of each object's content. crc hash.Hash32 // packhash hashes the pack contents so that at the end it is able to // validate the packfile's footer checksum against the calculated hash. packhash gogithash.Hash // objectIdSize holds the object ID size. objectIDSize int // next holds what state function should be executed on the next // call to Scan(). nextFn stateFn // packData holds the data for the last successful call to Scan(). packData PackData // err holds the first error that occurred. err error m sync.Mutex // storage is optional, and when set is used to store full objects found. // Note that delta objects are not stored. storage storer.EncodedObjectStorer *scannerReader zr gogitsync.ZLibReader buf bytes.Buffer } // NewScanner creates a new instance of Scanner. func NewScanner(rs io.Reader, opts ...ScannerOption) *Scanner { dict := make([]byte, 16*1024) crc := crc32.NewIEEE() packhash := gogithash.New(crypto.SHA1) r := &Scanner{ scannerReader: newScannerReader(rs, io.MultiWriter(crc, packhash)), zr: gogitsync.NewZlibReader(&dict), objIndex: -1, hasher: plumbing.NewHasher(format.SHA1, plumbing.AnyObject, 0), crc: crc, packhash: packhash, nextFn: packHeaderSignature, // Set the default size, which can be overriden by opts. objectIDSize: packhash.Size(), } for _, opt := range opts { opt(r) } return r } // Scan scans a Packfile sequently. Each call will navigate from a section // to the next, until the entire file is read. // // The section data can be accessed via calls to Data(). Example: // // for scanner.Scan() { // v := scanner.Data().Value() // // switch scanner.Data().Section { // case HeaderSection: // header := v.(Header) // fmt.Println("[Header] Objects Qty:", header.ObjectsQty) // case ObjectSection: // oh := v.(ObjectHeader) // fmt.Println("[Object] Object Type:", oh.Type) // case FooterSection: // checksum := v.(plumbing.Hash) // fmt.Println("[Footer] Checksum:", checksum) // } // } func (r *Scanner) Scan() bool { r.m.Lock() defer r.m.Unlock() if r.err != nil || r.nextFn == nil { return false } if err := scan(r); err != nil { r.err = err return false } return true } // Reset resets the current scanner, enabling it to be used to scan the // same Packfile again. func (r *Scanner) Reset() { r.scannerReader.Flush() r.scannerReader.Seek(0, io.SeekStart) r.packhash.Reset() r.objIndex = -1 r.version = 0 r.objects = 0 r.packData = PackData{} r.err = nil r.nextFn = packHeaderSignature } // Data returns the pack data based on the last call to Scan(). func (r *Scanner) Data() PackData { return r.packData } // Data returns the first error that occurred on the last call to Scan(). // Once an error occurs, calls to Scan() becomes a no-op. func (r *Scanner) Error() error { return r.err } func (r *Scanner) SeekFromStart(offset int64) error { r.Reset() if !r.Scan() { return fmt.Errorf("failed to reset and read header") } _, err := r.scannerReader.Seek(offset, io.SeekStart) return err } func (s *Scanner) WriteObject(oh *ObjectHeader, writer io.Writer) error { if oh.content.Len() > 0 { _, err := io.Copy(writer, bytes.NewReader(oh.content.Bytes())) return err } // If the oh is not an external ref and we don't have the // content offset, we won't be able to inflate via seeking through // the packfile. if oh.externalRef && oh.ContentOffset == 0 { return plumbing.ErrObjectNotFound } // Not a seeker data source. if s.seeker == nil { return plumbing.ErrObjectNotFound } err := s.inflateContent(oh.ContentOffset, writer) if err != nil { return ErrReferenceDeltaNotFound } return nil } func (s *Scanner) inflateContent(contentOffset int64, writer io.Writer) error { _, err := s.scannerReader.Seek(contentOffset, io.SeekStart) if err != nil { return err } err = s.zr.Reset(s.scannerReader) if err != nil { return fmt.Errorf("zlib reset error: %s", err) } _, err = io.Copy(writer, s.zr.Reader) if err != nil { return err } return nil } // scan goes through the next stateFn. // // State functions are chained by returning a non-nil value for stateFn. // In such cases, the returned stateFn will be called immediately after // the current func. func scan(r *Scanner) error { var err error for state := r.nextFn; state != nil; { state, err = state(r) if err != nil { return err } } return nil } // stateFn defines each individual state within the state machine that // represents a packfile. type stateFn func(*Scanner) (stateFn, error) // packHeaderSignature validates the packfile's header signature and // returns [ErrBadSignature] if the value provided is invalid. // // This is always the first state of a packfile and starts the chain // that handles the entire packfile header. func packHeaderSignature(r *Scanner) (stateFn, error) { start := make([]byte, 4) _, err := r.Read(start) if err != nil { return nil, fmt.Errorf("%w: %w", ErrBadSignature, err) } if bytes.Equal(start, signature) { return packVersion, nil } return nil, ErrBadSignature } // packVersion parses the packfile version. It returns [ErrMalformedPackfile] // when the version cannot be parsed. If a valid version is parsed, but it is // not currently supported, it returns [ErrUnsupportedVersion] instead. func packVersion(r *Scanner) (stateFn, error) { version, err := binary.ReadUint32(r.scannerReader) if err != nil { return nil, fmt.Errorf("%w: cannot read version", ErrMalformedPackfile) } v := Version(version) if !v.Supported() { return nil, ErrUnsupportedVersion } r.version = v return packObjectsQty, nil } // packObjectsQty parses the quantity of objects that the packfile contains. // If the value cannot be parsed, [ErrMalformedPackfile] is returned. // // This state ends the packfile header chain. func packObjectsQty(r *Scanner) (stateFn, error) { qty, err := binary.ReadUint32(r.scannerReader) if err != nil { return nil, fmt.Errorf("%w: cannot read number of objects", ErrMalformedPackfile) } if qty == 0 { return packFooter, nil } r.objects = qty r.packData = PackData{ Section: HeaderSection, header: Header{Version: r.version, ObjectsQty: r.objects}, } r.nextFn = objectEntry return nil, nil } // objectEntry handles the object entries within a packfile. This is generally // split between object headers and their contents. // // The object header contains the object type and size. If the type cannot be parsed, // [ErrMalformedPackfile] is returned. // // When SHA256 is enabled, the scanner will also calculate the SHA256 for each object. func objectEntry(r *Scanner) (stateFn, error) { if r.objIndex+1 >= int(r.objects) { return packFooter, nil } r.objIndex++ offset := r.scannerReader.offset r.scannerReader.Flush() r.crc.Reset() b := []byte{0} _, err := r.Read(b) if err != nil { return nil, err } typ := parseType(b[0]) if !typ.Valid() { return nil, fmt.Errorf("%w: invalid object type: %v", ErrMalformedPackfile, b[0]) } size, err := readVariableLengthSize(b[0], r) if err != nil { return nil, err } oh := ObjectHeader{ Offset: offset, Type: typ, diskType: typ, Size: int64(size), } switch oh.Type { case plumbing.OFSDeltaObject, plumbing.REFDeltaObject: // For delta objects, we need to skip the base reference if oh.Type == plumbing.OFSDeltaObject { no, err := binary.ReadVariableWidthInt(r.scannerReader) if err != nil { return nil, err } oh.OffsetReference = oh.Offset - no } else { oh.Reference.ResetBySize(r.objectIDSize) _, err := oh.Reference.ReadFrom(r.scannerReader) if err != nil { return nil, err } } } oh.ContentOffset = r.scannerReader.offset err = r.zr.Reset(r.scannerReader) if err != nil { return nil, fmt.Errorf("zlib reset error: %s", err) } if !oh.Type.IsDelta() { r.hasher.Reset(oh.Type, oh.Size) var mw io.Writer = r.hasher if r.storage != nil { w, err := r.storage.RawObjectWriter(oh.Type, oh.Size) if err != nil { return nil, err } defer w.Close() mw = io.MultiWriter(r.hasher, w) } if r.hasher256 != nil { r.hasher256.Reset(oh.Type, oh.Size) mw = io.MultiWriter(mw, r.hasher256) } // For non delta objects, simply calculate the hash of each object. _, err = io.CopyBuffer(mw, r.zr.Reader, r.buf.Bytes()) if err != nil { return nil, err } oh.Hash = r.hasher.Sum() if r.hasher256 != nil { h := r.hasher256.Sum() oh.Hash256 = &h } } else { // If data source is not io.Seeker, keep the content // in the cache, so that it can be accessed by the Parser. if r.scannerReader.seeker == nil { _, err = oh.content.ReadFrom(r.zr.Reader) if err != nil { return nil, err } } else { // We don't know the compressed length, so we can't seek to // the next object, we must discard the data instead. _, err = io.Copy(io.Discard, r.zr.Reader) if err != nil { return nil, err } } } r.scannerReader.Flush() oh.Crc32 = r.crc.Sum32() r.packData.Section = ObjectSection r.packData.objectHeader = oh return nil, nil } // packFooter parses the packfile checksum. // If the checksum cannot be parsed, or it does not match the checksum // calculated during the scanning process, an [ErrMalformedPackfile] is // returned. func packFooter(r *Scanner) (stateFn, error) { r.scannerReader.Flush() actual := r.packhash.Sum(nil) var checksum plumbing.Hash _, err := checksum.ReadFrom(r.scannerReader) if err != nil { return nil, fmt.Errorf("cannot read PACK checksum: %w", ErrMalformedPackfile) } if checksum.Compare(actual) != 0 { return nil, fmt.Errorf("checksum mismatch expected %q but found %q: %w", hex.EncodeToString(actual), checksum, ErrMalformedPackfile) } r.packData.Section = FooterSection r.packData.checksum = checksum r.nextFn = nil return nil, nil } func readVariableLengthSize(first byte, reader io.ByteReader) (uint64, error) { // Extract the first part of the size (last 3 bits of the first byte). size := uint64(first & 0x0F) // | 001xxxx | xxxxxxxx | xxxxxxxx | ... // // ^^^ ^^^^^^^^ ^^^^^^^^ // Type Size Part 1 Size Part 2 // // Check if more bytes are needed to fully determine the size. if first&maskContinue != 0 { shift := uint(4) for { b, err := reader.ReadByte() if err != nil { return 0, err } // Add the next 7 bits to the size. size |= uint64(b&0x7F) << shift // Check if the continuation bit is set. if b&maskContinue == 0 { break } // Prepare for the next byte. shift += 7 } } return size, nil } func parseType(b byte) plumbing.ObjectType { return plumbing.ObjectType((b & maskType) >> firstLengthBits) }
package packfile import ( "github.com/go-git/go-git/v6/plumbing" format "github.com/go-git/go-git/v6/plumbing/format/config" ) type ScannerOption func(*Scanner) // WithSHA256 enables the SHA256 hashing while scanning a pack file. func WithSHA256() ScannerOption { return func(s *Scanner) { h := plumbing.NewHasher(format.SHA256, plumbing.AnyObject, 0) s.objectIDSize = format.SHA256Size s.hasher256 = &h } }
package packfile import ( "bufio" "io" ) // scannerReader has the following characteristics: // - Provides an io.SeekReader impl for bufio.Reader, when the underlying // reader supports it. // - Keeps track of the current read position, for when the underlying reader // isn't an io.SeekReader, but we still want to know the current offset. // - Writes to the hash writer what it reads, with the aid of a smaller buffer. // The buffer helps avoid a performance penalty for performing small writes // to the crc32 hash writer. // // Note that this is passed on to zlib, and it mmust support io.BytesReader, else // it won't be able to just read the content of the current object, but rather it // will read the entire packfile. // // scannerReader is not thread-safe. type scannerReader struct { reader io.Reader crc io.Writer rbuf *bufio.Reader wbuf *bufio.Writer offset int64 seeker io.Seeker } func newScannerReader(r io.Reader, h io.Writer) *scannerReader { sr := &scannerReader{ rbuf: bufio.NewReader(nil), wbuf: bufio.NewWriterSize(nil, 64), crc: h, } sr.Reset(r) return sr } func (r *scannerReader) Reset(reader io.Reader) { r.reader = reader r.rbuf.Reset(r.reader) r.wbuf.Reset(r.crc) r.offset = 0 seeker, ok := r.reader.(io.ReadSeeker) r.seeker = seeker if ok { r.offset, _ = seeker.Seek(0, io.SeekCurrent) } } func (r *scannerReader) Read(p []byte) (n int, err error) { n, err = r.rbuf.Read(p) r.offset += int64(n) if _, err := r.wbuf.Write(p[:n]); err != nil { return n, err } return } func (r *scannerReader) ReadByte() (b byte, err error) { b, err = r.rbuf.ReadByte() if err == nil { r.offset++ return b, r.wbuf.WriteByte(b) } return } func (r *scannerReader) Flush() error { return r.wbuf.Flush() } // Seek seeks to a location. If the underlying reader is not an io.ReadSeeker, // then only whence=io.SeekCurrent is supported, any other operation fails. func (r *scannerReader) Seek(offset int64, whence int) (int64, error) { var err error if r.seeker == nil { if whence != io.SeekCurrent || offset != 0 { return -1, ErrSeekNotSupported } } if whence == io.SeekCurrent && offset == 0 { return r.offset, nil } r.offset, err = r.seeker.Seek(offset, whence) r.rbuf.Reset(r.reader) return r.offset, err }
package packfile import ( "bytes" "github.com/go-git/go-git/v6/plumbing" ) type Version uint32 const ( V2 Version = 2 ) func (v Version) Supported() bool { switch v { case V2: return true default: return false } } // ObjectHeader contains the information related to the object, this information // is collected from the previous bytes to the content of the object. type ObjectHeader struct { Type plumbing.ObjectType Offset int64 ContentOffset int64 Size int64 Reference plumbing.Hash OffsetReference int64 Crc32 uint32 Hash plumbing.Hash Hash256 *plumbing.Hash content bytes.Buffer parent *ObjectHeader diskType plumbing.ObjectType externalRef bool } // ID returns the preferred object ID. func (oh *ObjectHeader) ID() plumbing.Hash { if oh.Hash256 != nil { return *oh.Hash256 } return oh.Hash } type SectionType int const ( HeaderSection SectionType = iota ObjectSection FooterSection ) type Header struct { Version Version ObjectsQty uint32 } type PackData struct { Section SectionType header Header objectHeader ObjectHeader checksum plumbing.Hash } func (p PackData) Value() interface{} { switch p.Section { case HeaderSection: return p.header case ObjectSection: return p.objectHeader case FooterSection: return p.checksum default: return nil } }
package object import ( "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/ioutil" ) // Blob is used to store arbitrary data - it is generally a file. type Blob struct { // Hash of the blob. Hash plumbing.Hash // Size of the (uncompressed) blob. Size int64 obj plumbing.EncodedObject } // GetBlob gets a blob from an object storer and decodes it. func GetBlob(s storer.EncodedObjectStorer, h plumbing.Hash) (*Blob, error) { o, err := s.EncodedObject(plumbing.BlobObject, h) if err != nil { return nil, err } return DecodeBlob(o) } // DecodeObject decodes an encoded object into a *Blob. func DecodeBlob(o plumbing.EncodedObject) (*Blob, error) { b := &Blob{} if err := b.Decode(o); err != nil { return nil, err } return b, nil } // ID returns the object ID of the blob. The returned value will always match // the current value of Blob.Hash. // // ID is present to fulfill the Object interface. func (b *Blob) ID() plumbing.Hash { return b.Hash } // Type returns the type of object. It always returns plumbing.BlobObject. // // Type is present to fulfill the Object interface. func (b *Blob) Type() plumbing.ObjectType { return plumbing.BlobObject } // Decode transforms a plumbing.EncodedObject into a Blob struct. func (b *Blob) Decode(o plumbing.EncodedObject) error { if o.Type() != plumbing.BlobObject { return ErrUnsupportedObject } b.Hash = o.Hash() b.Size = o.Size() b.obj = o return nil } // Encode transforms a Blob into a plumbing.EncodedObject. func (b *Blob) Encode(o plumbing.EncodedObject) (err error) { o.SetType(plumbing.BlobObject) w, err := o.Writer() if err != nil { return err } defer ioutil.CheckClose(w, &err) r, err := b.Reader() if err != nil { return err } defer ioutil.CheckClose(r, &err) _, err = io.Copy(w, r) return err } // Reader returns a reader allow the access to the content of the blob func (b *Blob) Reader() (io.ReadCloser, error) { return b.obj.Reader() } // BlobIter provides an iterator for a set of blobs. type BlobIter struct { storer.EncodedObjectIter s storer.EncodedObjectStorer } // NewBlobIter takes a storer.EncodedObjectStorer and a // storer.EncodedObjectIter and returns a *BlobIter that iterates over all // blobs contained in the storer.EncodedObjectIter. // // Any non-blob object returned by the storer.EncodedObjectIter is skipped. func NewBlobIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) *BlobIter { return &BlobIter{iter, s} } // Next moves the iterator to the next blob and returns a pointer to it. If // there are no more blobs, it returns io.EOF. func (iter *BlobIter) Next() (*Blob, error) { for { obj, err := iter.EncodedObjectIter.Next() if err != nil { return nil, err } if obj.Type() != plumbing.BlobObject { continue } return DecodeBlob(obj) } } // ForEach call the cb function for each blob contained on this iter until // an error happens or the end of the iter is reached. If ErrStop is sent // the iteration is stop but no error is returned. The iterator is closed. func (iter *BlobIter) ForEach(cb func(*Blob) error) error { return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error { if obj.Type() != plumbing.BlobObject { return nil } b, err := DecodeBlob(obj) if err != nil { return err } return cb(b) }) }
package object import ( "bytes" "context" "fmt" "strings" "github.com/go-git/go-git/v6/utils/merkletrie" ) // Change values represent a detected change between two git trees. For // modifications, From is the original status of the node and To is its // final status. For insertions, From is the zero value and for // deletions To is the zero value. type Change struct { From ChangeEntry To ChangeEntry } var empty ChangeEntry // Action returns the kind of action represented by the change, an // insertion, a deletion or a modification. func (c *Change) Action() (merkletrie.Action, error) { if c.From == empty && c.To == empty { return merkletrie.Action(0), fmt.Errorf("malformed change: empty from and to") } if c.From == empty { return merkletrie.Insert, nil } if c.To == empty { return merkletrie.Delete, nil } return merkletrie.Modify, nil } // Files returns the files before and after a change. // For insertions from will be nil. For deletions to will be nil. func (c *Change) Files() (from, to *File, err error) { action, err := c.Action() if err != nil { return } if action == merkletrie.Insert || action == merkletrie.Modify { to, err = c.To.Tree.TreeEntryFile(&c.To.TreeEntry) if !c.To.TreeEntry.Mode.IsFile() { return nil, nil, nil } if err != nil { return } } if action == merkletrie.Delete || action == merkletrie.Modify { from, err = c.From.Tree.TreeEntryFile(&c.From.TreeEntry) if !c.From.TreeEntry.Mode.IsFile() { return nil, nil, nil } if err != nil { return } } return } func (c *Change) String() string { action, err := c.Action() if err != nil { return "malformed change" } return fmt.Sprintf("<Action: %s, Path: %s>", action, c.name()) } // Patch returns a Patch with all the file changes in chunks. This // representation can be used to create several diff outputs. func (c *Change) Patch() (*Patch, error) { return c.PatchContext(context.Background()) } // Patch returns a Patch with all the file changes in chunks. This // representation can be used to create several diff outputs. // If context expires, an non-nil error will be returned // Provided context must be non-nil func (c *Change) PatchContext(ctx context.Context) (*Patch, error) { return getPatchContext(ctx, "", c) } func (c *Change) name() string { if c.From != empty { return c.From.Name } return c.To.Name } // ChangeEntry values represent a node that has suffered a change. type ChangeEntry struct { // Full path of the node using "/" as separator. Name string // Parent tree of the node that has changed. Tree *Tree // The entry of the node. TreeEntry TreeEntry } // Changes represents a collection of changes between two git trees. // Implements sort.Interface lexicographically over the path of the // changed files. type Changes []*Change func (c Changes) Len() int { return len(c) } func (c Changes) Swap(i, j int) { c[i], c[j] = c[j], c[i] } func (c Changes) Less(i, j int) bool { return strings.Compare(c[i].name(), c[j].name()) < 0 } func (c Changes) String() string { var buffer bytes.Buffer buffer.WriteString("[") comma := "" for _, v := range c { buffer.WriteString(comma) buffer.WriteString(v.String()) comma = ", " } buffer.WriteString("]") return buffer.String() } // Patch returns a Patch with all the changes in chunks. This // representation can be used to create several diff outputs. func (c Changes) Patch() (*Patch, error) { return c.PatchContext(context.Background()) } // Patch returns a Patch with all the changes in chunks. This // representation can be used to create several diff outputs. // If context expires, an non-nil error will be returned // Provided context must be non-nil func (c Changes) PatchContext(ctx context.Context) (*Patch, error) { return getPatchContext(ctx, "", c...) }
package object import ( "errors" "fmt" "github.com/go-git/go-git/v6/utils/merkletrie" "github.com/go-git/go-git/v6/utils/merkletrie/noder" ) // The following functions transform changes types form the merkletrie // package to changes types from this package. func newChange(c merkletrie.Change) (*Change, error) { ret := &Change{} var err error if ret.From, err = newChangeEntry(c.From); err != nil { return nil, fmt.Errorf("from field: %s", err) } if ret.To, err = newChangeEntry(c.To); err != nil { return nil, fmt.Errorf("to field: %s", err) } return ret, nil } func newChangeEntry(p noder.Path) (ChangeEntry, error) { if p == nil { return empty, nil } asTreeNoder, ok := p.Last().(*treeNoder) if !ok { return ChangeEntry{}, errors.New("cannot transform non-TreeNoders") } return ChangeEntry{ Name: p.String(), Tree: asTreeNoder.parent, TreeEntry: TreeEntry{ Name: asTreeNoder.name, Mode: asTreeNoder.mode, Hash: asTreeNoder.hash, }, }, nil } func newChanges(src merkletrie.Changes) (Changes, error) { ret := make(Changes, len(src)) var err error for i, e := range src { ret[i], err = newChange(e) if err != nil { return nil, fmt.Errorf("change #%d: %s", i, err) } } return ret, nil }
package object import ( "bytes" "context" "errors" "fmt" "io" "strings" "github.com/ProtonMail/go-crypto/openpgp" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/ioutil" "github.com/go-git/go-git/v6/utils/sync" ) const ( beginpgp string = "-----BEGIN PGP SIGNATURE-----" endpgp string = "-----END PGP SIGNATURE-----" headerpgp string = "gpgsig" headerencoding string = "encoding" // https://github.com/git/git/blob/bcb6cae2966cc407ca1afc77413b3ef11103c175/Documentation/gitformat-signature.txt#L153 // When a merge commit is created from a signed tag, the tag is embedded in // the commit with the "mergetag" header. headermergetag string = "mergetag" defaultUtf8CommitMessageEncoding MessageEncoding = "UTF-8" ) // Hash represents the hash of an object type Hash plumbing.Hash // MessageEncoding represents the encoding of a commit type MessageEncoding string // Commit points to a single tree, marking it as what the project looked like // at a certain point in time. It contains meta-information about that point // in time, such as a timestamp, the author of the changes since the last // commit, a pointer to the previous commit(s), etc. // http://shafiulazam.com/gitbook/1_the_git_object_model.html type Commit struct { // Hash of the commit object. Hash plumbing.Hash // Author is the original author of the commit. Author Signature // Committer is the one performing the commit, might be different from // Author. Committer Signature // MergeTag is the embedded tag object when a merge commit is created by // merging a signed tag. MergeTag string // PGPSignature is the PGP signature of the commit. PGPSignature string // Message is the commit message, contains arbitrary text. Message string // TreeHash is the hash of the root tree of the commit. TreeHash plumbing.Hash // ParentHashes are the hashes of the parent commits of the commit. ParentHashes []plumbing.Hash // Encoding is the encoding of the commit. Encoding MessageEncoding s storer.EncodedObjectStorer } // GetCommit gets a commit from an object storer and decodes it. func GetCommit(s storer.EncodedObjectStorer, h plumbing.Hash) (*Commit, error) { o, err := s.EncodedObject(plumbing.CommitObject, h) if err != nil { return nil, err } return DecodeCommit(s, o) } // DecodeCommit decodes an encoded object into a *Commit and associates it to // the given object storer. func DecodeCommit(s storer.EncodedObjectStorer, o plumbing.EncodedObject) (*Commit, error) { c := &Commit{s: s} if err := c.Decode(o); err != nil { return nil, err } return c, nil } // Tree returns the Tree from the commit. func (c *Commit) Tree() (*Tree, error) { return GetTree(c.s, c.TreeHash) } // PatchContext returns the Patch between the actual commit and the provided one. // Error will be return if context expires. Provided context must be non-nil. // // NOTE: Since version 5.1.0 the renames are correctly handled, the settings // used are the recommended options DefaultDiffTreeOptions. func (c *Commit) PatchContext(ctx context.Context, to *Commit) (*Patch, error) { fromTree, err := c.Tree() if err != nil { return nil, err } var toTree *Tree if to != nil { toTree, err = to.Tree() if err != nil { return nil, err } } return fromTree.PatchContext(ctx, toTree) } // Patch returns the Patch between the actual commit and the provided one. // // NOTE: Since version 5.1.0 the renames are correctly handled, the settings // used are the recommended options DefaultDiffTreeOptions. func (c *Commit) Patch(to *Commit) (*Patch, error) { return c.PatchContext(context.Background(), to) } // Parents return a CommitIter to the parent Commits. func (c *Commit) Parents() CommitIter { return NewCommitIter(c.s, storer.NewEncodedObjectLookupIter(c.s, plumbing.CommitObject, c.ParentHashes), ) } // NumParents returns the number of parents in a commit. func (c *Commit) NumParents() int { return len(c.ParentHashes) } var ErrParentNotFound = errors.New("commit parent not found") // Parent returns the ith parent of a commit. func (c *Commit) Parent(i int) (*Commit, error) { if len(c.ParentHashes) == 0 || i > len(c.ParentHashes)-1 { return nil, ErrParentNotFound } return GetCommit(c.s, c.ParentHashes[i]) } // File returns the file with the specified "path" in the commit and a // nil error if the file exists. If the file does not exist, it returns // a nil file and the ErrFileNotFound error. func (c *Commit) File(path string) (*File, error) { tree, err := c.Tree() if err != nil { return nil, err } return tree.File(path) } // Files returns a FileIter allowing to iterate over the Tree func (c *Commit) Files() (*FileIter, error) { tree, err := c.Tree() if err != nil { return nil, err } return tree.Files(), nil } // ID returns the object ID of the commit. The returned value will always match // the current value of Commit.Hash. // // ID is present to fulfill the Object interface. func (c *Commit) ID() plumbing.Hash { return c.Hash } // Type returns the type of object. It always returns plumbing.CommitObject. // // Type is present to fulfill the Object interface. func (c *Commit) Type() plumbing.ObjectType { return plumbing.CommitObject } // Decode transforms a plumbing.EncodedObject into a Commit struct. func (c *Commit) Decode(o plumbing.EncodedObject) (err error) { if o.Type() != plumbing.CommitObject { return ErrUnsupportedObject } c.Hash = o.Hash() c.Encoding = defaultUtf8CommitMessageEncoding reader, err := o.Reader() if err != nil { return err } defer ioutil.CheckClose(reader, &err) r := sync.GetBufioReader(reader) defer sync.PutBufioReader(r) var message bool var mergetag bool var pgpsig bool var msgbuf bytes.Buffer for { line, err := r.ReadBytes('\n') if err != nil && err != io.EOF { return err } if mergetag { if len(line) > 0 && line[0] == ' ' { line = bytes.TrimLeft(line, " ") c.MergeTag += string(line) continue } else { mergetag = false } } if pgpsig { if len(line) > 0 && line[0] == ' ' { line = bytes.TrimLeft(line, " ") c.PGPSignature += string(line) continue } else { pgpsig = false } } if !message { line = bytes.TrimSpace(line) if len(line) == 0 { message = true continue } split := bytes.SplitN(line, []byte{' '}, 2) var data []byte if len(split) == 2 { data = split[1] } switch string(split[0]) { case "tree": c.TreeHash = plumbing.NewHash(string(data)) case "parent": c.ParentHashes = append(c.ParentHashes, plumbing.NewHash(string(data))) case "author": c.Author.Decode(data) case "committer": c.Committer.Decode(data) case headermergetag: c.MergeTag += string(data) + "\n" mergetag = true case headerencoding: c.Encoding = MessageEncoding(data) case headerpgp: c.PGPSignature += string(data) + "\n" pgpsig = true } } else { msgbuf.Write(line) } if err == io.EOF { break } } c.Message = msgbuf.String() return nil } // Encode transforms a Commit into a plumbing.EncodedObject. func (c *Commit) Encode(o plumbing.EncodedObject) error { return c.encode(o, true) } // EncodeWithoutSignature export a Commit into a plumbing.EncodedObject without the signature (correspond to the payload of the PGP signature). func (c *Commit) EncodeWithoutSignature(o plumbing.EncodedObject) error { return c.encode(o, false) } func (c *Commit) encode(o plumbing.EncodedObject, includeSig bool) (err error) { o.SetType(plumbing.CommitObject) w, err := o.Writer() if err != nil { return err } defer ioutil.CheckClose(w, &err) if _, err = fmt.Fprintf(w, "tree %s\n", c.TreeHash.String()); err != nil { return err } for _, parent := range c.ParentHashes { if _, err = fmt.Fprintf(w, "parent %s\n", parent.String()); err != nil { return err } } if _, err = fmt.Fprint(w, "author "); err != nil { return err } if err = c.Author.Encode(w); err != nil { return err } if _, err = fmt.Fprint(w, "\ncommitter "); err != nil { return err } if err = c.Committer.Encode(w); err != nil { return err } if c.MergeTag != "" { if _, err = fmt.Fprint(w, "\n"+headermergetag+" "); err != nil { return err } // Split tag information lines and re-write with a left padding and // newline. Use join for this so it's clear that a newline should not be // added after this section. The newline will be added either as part of // the PGP signature or the commit message. mergetag := strings.TrimSuffix(c.MergeTag, "\n") lines := strings.Split(mergetag, "\n") if _, err = fmt.Fprint(w, strings.Join(lines, "\n ")); err != nil { return err } } if string(c.Encoding) != "" && c.Encoding != defaultUtf8CommitMessageEncoding { if _, err = fmt.Fprintf(w, "\n%s %s", headerencoding, c.Encoding); err != nil { return err } } if c.PGPSignature != "" && includeSig { if _, err = fmt.Fprint(w, "\n"+headerpgp+" "); err != nil { return err } // Split all the signature lines and re-write with a left padding and // newline. Use join for this so it's clear that a newline should not be // added after this section, as it will be added when the message is // printed. signature := strings.TrimSuffix(c.PGPSignature, "\n") lines := strings.Split(signature, "\n") if _, err = fmt.Fprint(w, strings.Join(lines, "\n ")); err != nil { return err } } if _, err = fmt.Fprintf(w, "\n\n%s", c.Message); err != nil { return err } return err } // Stats returns the stats of a commit. func (c *Commit) Stats() (FileStats, error) { return c.StatsContext(context.Background()) } // StatsContext returns the stats of a commit. Error will be return if context // expires. Provided context must be non-nil. func (c *Commit) StatsContext(ctx context.Context) (FileStats, error) { fromTree, err := c.Tree() if err != nil { return nil, err } toTree := &Tree{} if c.NumParents() != 0 { firstParent, err := c.Parents().Next() if err != nil { return nil, err } toTree, err = firstParent.Tree() if err != nil { return nil, err } } patch, err := toTree.PatchContext(ctx, fromTree) if err != nil { return nil, err } return getFileStatsFromFilePatches(patch.FilePatches()), nil } func (c *Commit) String() string { return fmt.Sprintf( "%s %s\nAuthor: %s\nDate: %s\n\n%s\n", plumbing.CommitObject, c.Hash, c.Author.String(), c.Author.When.Format(DateFormat), indent(c.Message), ) } // Verify performs PGP verification of the commit with a provided armored // keyring and returns openpgp.Entity associated with verifying key on success. func (c *Commit) Verify(armoredKeyRing string) (*openpgp.Entity, error) { keyRingReader := strings.NewReader(armoredKeyRing) keyring, err := openpgp.ReadArmoredKeyRing(keyRingReader) if err != nil { return nil, err } // Extract signature. signature := strings.NewReader(c.PGPSignature) encoded := &plumbing.MemoryObject{} // Encode commit components, excluding signature and get a reader object. if err := c.EncodeWithoutSignature(encoded); err != nil { return nil, err } er, err := encoded.Reader() if err != nil { return nil, err } return openpgp.CheckArmoredDetachedSignature(keyring, er, signature, nil) } // Less defines a compare function to determine which commit is 'earlier' by: // - First use Committer.When // - If Committer.When are equal then use Author.When // - If Author.When also equal then compare the string value of the hash func (c *Commit) Less(rhs *Commit) bool { return c.Committer.When.Before(rhs.Committer.When) || (c.Committer.When.Equal(rhs.Committer.When) && (c.Author.When.Before(rhs.Author.When) || (c.Author.When.Equal(rhs.Author.When) && c.Hash.Compare(rhs.Hash.Bytes()) < 0))) } func indent(t string) string { var output []string for _, line := range strings.Split(t, "\n") { if len(line) != 0 { line = " " + line } output = append(output, line) } return strings.Join(output, "\n") } // CommitIter is a generic closable interface for iterating over commits. type CommitIter interface { Next() (*Commit, error) ForEach(func(*Commit) error) error Close() } // storerCommitIter provides an iterator from commits in an EncodedObjectStorer. type storerCommitIter struct { storer.EncodedObjectIter s storer.EncodedObjectStorer } // NewCommitIter takes a storer.EncodedObjectStorer and a // storer.EncodedObjectIter and returns a CommitIter that iterates over all // commits contained in the storer.EncodedObjectIter. // // Any non-commit object returned by the storer.EncodedObjectIter is skipped. func NewCommitIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) CommitIter { return &storerCommitIter{iter, s} } // Next moves the iterator to the next commit and returns a pointer to it. If // there are no more commits, it returns io.EOF. func (iter *storerCommitIter) Next() (*Commit, error) { obj, err := iter.EncodedObjectIter.Next() if err != nil { return nil, err } return DecodeCommit(iter.s, obj) } // ForEach call the cb function for each commit contained on this iter until // an error appends or the end of the iter is reached. If ErrStop is sent // the iteration is stopped but no error is returned. The iterator is closed. func (iter *storerCommitIter) ForEach(cb func(*Commit) error) error { return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error { c, err := DecodeCommit(iter.s, obj) if err != nil { return err } return cb(c) }) } func (iter *storerCommitIter) Close() { iter.EncodedObjectIter.Close() }
package object import ( "container/list" "errors" "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/storage" ) type commitPreIterator struct { seenExternal map[plumbing.Hash]bool seen map[plumbing.Hash]bool stack []CommitIter start *Commit } func forEachCommit(next func() (*Commit, error), cb func(*Commit) error) error { for { c, err := next() if errors.Is(err, io.EOF) { break } if err != nil { return err } err = cb(c) if err == storer.ErrStop { break } if err != nil { return err } } return nil } // NewCommitPreorderIter returns a CommitIter that walks the commit history, // starting at the given commit and visiting its parents in pre-order. // The given callback will be called for each visited commit. Each commit will // be visited only once. If the callback returns an error, walking will stop // and will return the error. Other errors might be returned if the history // cannot be traversed (e.g. missing objects). Ignore allows to skip some // commits from being iterated. func NewCommitPreorderIter( c *Commit, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash, ) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &commitPreIterator{ seenExternal: seenExternal, seen: seen, stack: make([]CommitIter, 0), start: c, } } func (w *commitPreIterator) Next() (*Commit, error) { var c *Commit for { if w.start != nil { c = w.start w.start = nil } else { current := len(w.stack) - 1 if current < 0 { return nil, io.EOF } var err error c, err = w.stack[current].Next() if err == io.EOF { w.stack = w.stack[:current] continue } if err != nil { return nil, err } } if w.seen[c.Hash] || w.seenExternal[c.Hash] { continue } w.seen[c.Hash] = true if c.NumParents() > 0 { w.stack = append(w.stack, filteredParentIter(c, w.seen)) } return c, nil } } func filteredParentIter(c *Commit, seen map[plumbing.Hash]bool) CommitIter { var hashes []plumbing.Hash for _, h := range c.ParentHashes { if !seen[h] { hashes = append(hashes, h) } } return NewCommitIter(c.s, storer.NewEncodedObjectLookupIter(c.s, plumbing.CommitObject, hashes), ) } func (w *commitPreIterator) ForEach(cb func(*Commit) error) error { return forEachCommit(w.Next, cb) } func (w *commitPreIterator) Close() {} type commitPostIterator struct { stack []*Commit seen map[plumbing.Hash]bool } // NewCommitPostorderIter returns a CommitIter that walks the commit // history like WalkCommitHistory but in post-order. This means that after // walking a merge commit, the merged commit will be walked before the base // it was merged on. This can be useful if you wish to see the history in // chronological order. Ignore allows to skip some commits from being iterated. func NewCommitPostorderIter(c *Commit, ignore []plumbing.Hash) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &commitPostIterator{ stack: []*Commit{c}, seen: seen, } } func (w *commitPostIterator) Next() (*Commit, error) { for { if len(w.stack) == 0 { return nil, io.EOF } c := w.stack[len(w.stack)-1] w.stack = w.stack[:len(w.stack)-1] if w.seen[c.Hash] { continue } w.seen[c.Hash] = true return c, c.Parents().ForEach(func(p *Commit) error { w.stack = append(w.stack, p) return nil }) } } func (w *commitPostIterator) ForEach(cb func(*Commit) error) error { return forEachCommit(w.Next, cb) } func (w *commitPostIterator) Close() {} type commitPostIteratorFirstParent struct { stack []*Commit seen map[plumbing.Hash]bool } // NewCommitPostorderIterFirstParent returns a CommitIter that walks the commit // history like WalkCommitHistory but in post-order. // // This option acts like the git log --first-parent flag, skipping intermediate // commits that were brought in via a merge commit. // Ignore allows to skip some commits from being iterated. func NewCommitPostorderIterFirstParent(c *Commit, ignore []plumbing.Hash) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &commitPostIteratorFirstParent{ stack: []*Commit{c}, seen: seen, } } func (w *commitPostIteratorFirstParent) Next() (*Commit, error) { for { if len(w.stack) == 0 { return nil, io.EOF } c := w.stack[len(w.stack)-1] w.stack = w.stack[:len(w.stack)-1] if w.seen[c.Hash] { continue } w.seen[c.Hash] = true return c, c.Parents().ForEach(func(p *Commit) error { if len(c.ParentHashes) > 0 && p.Hash == c.ParentHashes[0] { w.stack = append(w.stack, p) } return nil }) } } func (w *commitPostIteratorFirstParent) ForEach(cb func(*Commit) error) error { return forEachCommit(w.Next, cb) } func (w *commitPostIteratorFirstParent) Close() {} // commitAllIterator stands for commit iterator for all refs. type commitAllIterator struct { // currCommit points to the current commit. currCommit *list.Element } // NewCommitAllIter returns a new commit iterator for all refs. // repoStorer is a repo Storer used to get commits and references. // commitIterFunc is a commit iterator function, used to iterate through ref commits in chosen order func NewCommitAllIter(repoStorer storage.Storer, commitIterFunc func(*Commit) CommitIter) (CommitIter, error) { commitsPath := list.New() commitsLookup := make(map[plumbing.Hash]*list.Element) head, err := storer.ResolveReference(repoStorer, plumbing.HEAD) if err == nil { err = addReference(repoStorer, commitIterFunc, head, commitsPath, commitsLookup) } if err != nil && err != plumbing.ErrReferenceNotFound { return nil, err } // add all references along with the HEAD refIter, err := repoStorer.IterReferences() if err != nil { return nil, err } defer refIter.Close() for { ref, err := refIter.Next() if err == io.EOF { break } if err == plumbing.ErrReferenceNotFound { continue } if err != nil { return nil, err } if err = addReference(repoStorer, commitIterFunc, ref, commitsPath, commitsLookup); err != nil { return nil, err } } return &commitAllIterator{commitsPath.Front()}, nil } func addReference( repoStorer storage.Storer, commitIterFunc func(*Commit) CommitIter, ref *plumbing.Reference, commitsPath *list.List, commitsLookup map[plumbing.Hash]*list.Element) error { _, exists := commitsLookup[ref.Hash()] if exists { // we already have it - skip the reference. return nil } refCommit, _ := GetCommit(repoStorer, ref.Hash()) if refCommit == nil { // if it's not a commit - skip it. return nil } var ( refCommits []*Commit parent *list.Element ) // collect all ref commits to add commitIter := commitIterFunc(refCommit) for c, e := commitIter.Next(); e == nil; { parent, exists = commitsLookup[c.Hash] if exists { break } refCommits = append(refCommits, c) c, e = commitIter.Next() } commitIter.Close() if parent == nil { // common parent - not found // add all commits to the path from this ref (maybe it's a HEAD and we don't have anything, yet) for _, c := range refCommits { parent = commitsPath.PushBack(c) commitsLookup[c.Hash] = parent } } else { // add ref's commits to the path in reverse order (from the latest) for i := len(refCommits) - 1; i >= 0; i-- { c := refCommits[i] // insert before found common parent parent = commitsPath.InsertBefore(c, parent) commitsLookup[c.Hash] = parent } } return nil } func (it *commitAllIterator) Next() (*Commit, error) { if it.currCommit == nil { return nil, io.EOF } c := it.currCommit.Value.(*Commit) it.currCommit = it.currCommit.Next() return c, nil } func (it *commitAllIterator) ForEach(cb func(*Commit) error) error { return forEachCommit(it.Next, cb) } func (it *commitAllIterator) Close() { it.currCommit = nil }
package object import ( "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" ) type bfsCommitIterator struct { seenExternal map[plumbing.Hash]bool seen map[plumbing.Hash]bool queue []*Commit } // NewCommitIterBSF returns a CommitIter that walks the commit history, // starting at the given commit and visiting its parents in pre-order. // The given callback will be called for each visited commit. Each commit will // be visited only once. If the callback returns an error, walking will stop // and will return the error. Other errors might be returned if the history // cannot be traversed (e.g. missing objects). Ignore allows to skip some // commits from being iterated. func NewCommitIterBSF( c *Commit, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash, ) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } return &bfsCommitIterator{ seenExternal: seenExternal, seen: seen, queue: []*Commit{c}, } } func (w *bfsCommitIterator) appendHash(store storer.EncodedObjectStorer, h plumbing.Hash) error { if w.seen[h] || w.seenExternal[h] { return nil } c, err := GetCommit(store, h) if err != nil { return err } w.queue = append(w.queue, c) return nil } func (w *bfsCommitIterator) Next() (*Commit, error) { var c *Commit for { if len(w.queue) == 0 { return nil, io.EOF } c = w.queue[0] w.queue = w.queue[1:] if w.seen[c.Hash] || w.seenExternal[c.Hash] { continue } w.seen[c.Hash] = true for _, h := range c.ParentHashes { err := w.appendHash(c.s, h) if err != nil { return nil, err } } return c, nil } } func (w *bfsCommitIterator) ForEach(cb func(*Commit) error) error { for { c, err := w.Next() if err == io.EOF { break } if err != nil { return err } err = cb(c) if err == storer.ErrStop { break } if err != nil { return err } } return nil } func (w *bfsCommitIterator) Close() {}
package object import ( "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" ) // NewFilterCommitIter returns a CommitIter that walks the commit history, // starting at the passed commit and visiting its parents in Breadth-first order. // The commits returned by the CommitIter will validate the passed CommitFilter. // The history won't be transversed beyond a commit if isLimit is true for it. // Each commit will be visited only once. // If the commit history can not be traversed, or the Close() method is called, // the CommitIter won't return more commits. // If no isValid is passed, all ancestors of from commit will be valid. // If no isLimit is limit, all ancestors of all commits will be visited. func NewFilterCommitIter( from *Commit, isValid *CommitFilter, isLimit *CommitFilter, ) CommitIter { var validFilter CommitFilter if isValid == nil { validFilter = func(_ *Commit) bool { return true } } else { validFilter = *isValid } var limitFilter CommitFilter if isLimit == nil { limitFilter = func(_ *Commit) bool { return false } } else { limitFilter = *isLimit } return &filterCommitIter{ isValid: validFilter, isLimit: limitFilter, visited: map[plumbing.Hash]struct{}{}, queue: []*Commit{from}, } } // CommitFilter returns a boolean for the passed Commit type CommitFilter func(*Commit) bool // filterCommitIter implements CommitIter type filterCommitIter struct { isValid CommitFilter isLimit CommitFilter visited map[plumbing.Hash]struct{} queue []*Commit lastErr error } // Next returns the next commit of the CommitIter. // It will return io.EOF if there are no more commits to visit, // or an error if the history could not be traversed. func (w *filterCommitIter) Next() (*Commit, error) { var commit *Commit var err error for { commit, err = w.popNewFromQueue() if err != nil { return nil, w.close(err) } w.visited[commit.Hash] = struct{}{} if !w.isLimit(commit) { err = w.addToQueue(commit.s, commit.ParentHashes...) if err != nil { return nil, w.close(err) } } if w.isValid(commit) { return commit, nil } } } // ForEach runs the passed callback over each Commit returned by the CommitIter // until the callback returns an error or there is no more commits to traverse. func (w *filterCommitIter) ForEach(cb func(*Commit) error) error { for { commit, err := w.Next() if err == io.EOF { break } if err != nil { return err } if err := cb(commit); err == storer.ErrStop { break } else if err != nil { return err } } return nil } // Error returns the error that caused that the CommitIter is no longer returning commits func (w *filterCommitIter) Error() error { return w.lastErr } // Close closes the CommitIter func (w *filterCommitIter) Close() { w.visited = map[plumbing.Hash]struct{}{} w.queue = []*Commit{} w.isLimit = nil w.isValid = nil } // close closes the CommitIter with an error func (w *filterCommitIter) close(err error) error { w.Close() w.lastErr = err return err } // popNewFromQueue returns the first new commit from the internal fifo queue, // or an io.EOF error if the queue is empty func (w *filterCommitIter) popNewFromQueue() (*Commit, error) { var first *Commit for { if len(w.queue) == 0 { if w.lastErr != nil { return nil, w.lastErr } return nil, io.EOF } first = w.queue[0] w.queue = w.queue[1:] if _, ok := w.visited[first.Hash]; ok { continue } return first, nil } } // addToQueue adds the passed commits to the internal fifo queue if they weren't seen // or returns an error if the passed hashes could not be used to get valid commits func (w *filterCommitIter) addToQueue( store storer.EncodedObjectStorer, hashes ...plumbing.Hash, ) error { for _, hash := range hashes { if _, ok := w.visited[hash]; ok { continue } commit, err := GetCommit(store, hash) if err != nil { return err } w.queue = append(w.queue, commit) } return nil }
package object import ( "io" "github.com/emirpasic/gods/trees/binaryheap" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" ) type commitIteratorByCTime struct { seenExternal map[plumbing.Hash]bool seen map[plumbing.Hash]bool heap *binaryheap.Heap } // NewCommitIterCTime returns a CommitIter that walks the commit history, // starting at the given commit and visiting its parents while preserving Committer Time order. // this appears to be the closest order to `git log` // The given callback will be called for each visited commit. Each commit will // be visited only once. If the callback returns an error, walking will stop // and will return the error. Other errors might be returned if the history // cannot be traversed (e.g. missing objects). Ignore allows to skip some // commits from being iterated. func NewCommitIterCTime( c *Commit, seenExternal map[plumbing.Hash]bool, ignore []plumbing.Hash, ) CommitIter { seen := make(map[plumbing.Hash]bool) for _, h := range ignore { seen[h] = true } heap := binaryheap.NewWith(func(a, b interface{}) int { if a.(*Commit).Committer.When.Before(b.(*Commit).Committer.When) { return 1 } return -1 }) heap.Push(c) return &commitIteratorByCTime{ seenExternal: seenExternal, seen: seen, heap: heap, } } func (w *commitIteratorByCTime) Next() (*Commit, error) { var c *Commit for { cIn, ok := w.heap.Pop() if !ok { return nil, io.EOF } c = cIn.(*Commit) if w.seen[c.Hash] || w.seenExternal[c.Hash] { continue } w.seen[c.Hash] = true for _, h := range c.ParentHashes { if w.seen[h] || w.seenExternal[h] { continue } pc, err := GetCommit(c.s, h) if err != nil { return nil, err } w.heap.Push(pc) } return c, nil } } func (w *commitIteratorByCTime) ForEach(cb func(*Commit) error) error { for { c, err := w.Next() if err == io.EOF { break } if err != nil { return err } err = cb(c) if err == storer.ErrStop { break } if err != nil { return err } } return nil } func (w *commitIteratorByCTime) Close() {}
package object import ( "io" "time" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" ) type commitLimitIter struct { sourceIter CommitIter limitOptions LogLimitOptions } type LogLimitOptions struct { Since *time.Time Until *time.Time TailHash plumbing.Hash } func NewCommitLimitIterFromIter(commitIter CommitIter, limitOptions LogLimitOptions) CommitIter { iterator := new(commitLimitIter) iterator.sourceIter = commitIter iterator.limitOptions = limitOptions return iterator } func (c *commitLimitIter) Next() (*Commit, error) { for { commit, err := c.sourceIter.Next() if err != nil { return nil, err } if c.limitOptions.Since != nil && commit.Committer.When.Before(*c.limitOptions.Since) { continue } if c.limitOptions.Until != nil && commit.Committer.When.After(*c.limitOptions.Until) { continue } if c.limitOptions.TailHash == commit.Hash { return commit, storer.ErrStop } return commit, nil } } func (c *commitLimitIter) ForEach(cb func(*Commit) error) error { for { commit, nextErr := c.Next() if nextErr == io.EOF { break } if nextErr != nil && nextErr != storer.ErrStop { return nextErr } err := cb(commit) if err == storer.ErrStop || nextErr == storer.ErrStop { return nil } else if err != nil { return err } } return nil } func (c *commitLimitIter) Close() { c.sourceIter.Close() }
package object import ( "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" ) type commitPathIter struct { pathFilter func(string) bool sourceIter CommitIter currentCommit *Commit checkParent bool } // NewCommitPathIterFromIter returns a commit iterator which performs diffTree between // successive trees returned from the commit iterator from the argument. The purpose of this is // to find the commits that explain how the files that match the path came to be. // If checkParent is true then the function double checks if potential parent (next commit in a path) // is one of the parents in the tree (it's used by `git log --all`). // pathFilter is a function that takes path of file as argument and returns true if we want it func NewCommitPathIterFromIter(pathFilter func(string) bool, commitIter CommitIter, checkParent bool) CommitIter { iterator := new(commitPathIter) iterator.sourceIter = commitIter iterator.pathFilter = pathFilter iterator.checkParent = checkParent return iterator } // NewCommitFileIterFromIter is kept for compatibility, can be replaced with NewCommitPathIterFromIter func NewCommitFileIterFromIter(fileName string, commitIter CommitIter, checkParent bool) CommitIter { return NewCommitPathIterFromIter( func(path string) bool { return path == fileName }, commitIter, checkParent, ) } func (c *commitPathIter) Next() (*Commit, error) { if c.currentCommit == nil { var err error c.currentCommit, err = c.sourceIter.Next() if err != nil { return nil, err } } commit, commitErr := c.getNextFileCommit() // Setting current-commit to nil to prevent unwanted states when errors are raised if commitErr != nil { c.currentCommit = nil } return commit, commitErr } func (c *commitPathIter) getNextFileCommit() (*Commit, error) { var parentTree, currentTree *Tree for { // Parent-commit can be nil if the current-commit is the initial commit parentCommit, parentCommitErr := c.sourceIter.Next() if parentCommitErr != nil { // If the parent-commit is beyond the initial commit, keep it nil if parentCommitErr != io.EOF { return nil, parentCommitErr } parentCommit = nil } if parentTree == nil { var currTreeErr error currentTree, currTreeErr = c.currentCommit.Tree() if currTreeErr != nil { return nil, currTreeErr } } else { currentTree = parentTree parentTree = nil } if parentCommit != nil { var parentTreeErr error parentTree, parentTreeErr = parentCommit.Tree() if parentTreeErr != nil { return nil, parentTreeErr } } // Find diff between current and parent trees changes, diffErr := DiffTree(currentTree, parentTree) if diffErr != nil { return nil, diffErr } found := c.hasFileChange(changes, parentCommit) // Storing the current-commit in-case a change is found, and // Updating the current-commit for the next-iteration prevCommit := c.currentCommit c.currentCommit = parentCommit if found { return prevCommit, nil } // If not matches found and if parent-commit is beyond the initial commit, then return with EOF if parentCommit == nil { return nil, io.EOF } } } func (c *commitPathIter) hasFileChange(changes Changes, parent *Commit) bool { for _, change := range changes { if !c.pathFilter(change.name()) { continue } // filename matches, now check if source iterator contains all commits (from all refs) if c.checkParent { // Check if parent is beyond the initial commit if parent == nil || isParentHash(parent.Hash, c.currentCommit) { return true } continue } return true } return false } func isParentHash(hash plumbing.Hash, commit *Commit) bool { for _, h := range commit.ParentHashes { if h == hash { return true } } return false } func (c *commitPathIter) ForEach(cb func(*Commit) error) error { for { commit, nextErr := c.Next() if nextErr == io.EOF { break } if nextErr != nil { return nextErr } err := cb(commit) if err == storer.ErrStop { return nil } else if err != nil { return err } } return nil } func (c *commitPathIter) Close() { c.sourceIter.Close() }
package object import ( "bytes" "context" "github.com/go-git/go-git/v6/utils/merkletrie" "github.com/go-git/go-git/v6/utils/merkletrie/noder" ) // DiffTree compares the content and mode of the blobs found via two // tree objects. // DiffTree does not perform rename detection, use DiffTreeWithOptions // instead to detect renames. func DiffTree(a, b *Tree) (Changes, error) { return DiffTreeContext(context.Background(), a, b) } // DiffTreeContext compares the content and mode of the blobs found via two // tree objects. Provided context must be non-nil. // An error will be returned if context expires. func DiffTreeContext(ctx context.Context, a, b *Tree) (Changes, error) { return DiffTreeWithOptions(ctx, a, b, nil) } // DiffTreeOptions are the configurable options when performing a diff tree. type DiffTreeOptions struct { // DetectRenames is whether the diff tree will use rename detection. DetectRenames bool // RenameScore is the threshold to of similarity between files to consider // that a pair of delete and insert are a rename. The number must be // exactly between 0 and 100. RenameScore uint // RenameLimit is the maximum amount of files that can be compared when // detecting renames. The number of comparisons that have to be performed // is equal to the number of deleted files * the number of added files. // That means, that if 100 files were deleted and 50 files were added, 5000 // file comparisons may be needed. So, if the rename limit is 50, the number // of both deleted and added needs to be equal or less than 50. // A value of 0 means no limit. RenameLimit uint // OnlyExactRenames performs only detection of exact renames and will not perform // any detection of renames based on file similarity. OnlyExactRenames bool } // DefaultDiffTreeOptions are the default and recommended options for the // diff tree. var DefaultDiffTreeOptions = &DiffTreeOptions{ DetectRenames: true, RenameScore: 60, RenameLimit: 0, OnlyExactRenames: false, } // DiffTreeWithOptions compares the content and mode of the blobs found // via two tree objects with the given options. The provided context // must be non-nil. // If no options are passed, no rename detection will be performed. The // recommended options are DefaultDiffTreeOptions. // An error will be returned if the context expires. // This function will be deprecated and removed in v6 so the default // behaviour of DiffTree is to detect renames. func DiffTreeWithOptions( ctx context.Context, a, b *Tree, opts *DiffTreeOptions, ) (Changes, error) { from := NewTreeRootNode(a) to := NewTreeRootNode(b) hashEqual := func(a, b noder.Hasher) bool { return bytes.Equal(a.Hash(), b.Hash()) } merkletrieChanges, err := merkletrie.DiffTreeContext(ctx, from, to, hashEqual) if err != nil { if err == merkletrie.ErrCanceled { return nil, ErrCanceled } return nil, err } changes, err := newChanges(merkletrieChanges) if err != nil { return nil, err } if opts == nil { opts = new(DiffTreeOptions) } if opts.DetectRenames { return DetectRenames(changes, opts) } return changes, nil }
package object import ( "bytes" "io" "strings" "github.com/go-git/go-git/v6/plumbing/filemode" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/binary" "github.com/go-git/go-git/v6/utils/ioutil" ) // File represents git file objects. type File struct { // Name is the path of the file. It might be relative to a tree, // depending of the function that generates it. Name string // Mode is the file mode. Mode filemode.FileMode // Blob with the contents of the file. Blob } // NewFile returns a File based on the given blob object func NewFile(name string, m filemode.FileMode, b *Blob) *File { return &File{Name: name, Mode: m, Blob: *b} } // Contents returns the contents of a file as a string. func (f *File) Contents() (content string, err error) { reader, err := f.Reader() if err != nil { return "", err } defer ioutil.CheckClose(reader, &err) buf := new(bytes.Buffer) if _, err := buf.ReadFrom(reader); err != nil { return "", err } return buf.String(), nil } // IsBinary returns if the file is binary or not func (f *File) IsBinary() (bin bool, err error) { reader, err := f.Reader() if err != nil { return false, err } defer ioutil.CheckClose(reader, &err) return binary.IsBinary(reader) } // Lines returns a slice of lines from the contents of a file, stripping // all end of line characters. If the last line is empty (does not end // in an end of line), it is also stripped. func (f *File) Lines() ([]string, error) { content, err := f.Contents() if err != nil { return nil, err } splits := strings.Split(content, "\n") // remove the last line if it is empty if splits[len(splits)-1] == "" { return splits[:len(splits)-1], nil } return splits, nil } // FileIter provides an iterator for the files in a tree. type FileIter struct { s storer.EncodedObjectStorer w TreeWalker } // NewFileIter takes a storer.EncodedObjectStorer and a Tree and returns a // *FileIter that iterates over all files contained in the tree, recursively. func NewFileIter(s storer.EncodedObjectStorer, t *Tree) *FileIter { return &FileIter{s: s, w: *NewTreeWalker(t, true, nil)} } // Next moves the iterator to the next file and returns a pointer to it. If // there are no more files, it returns io.EOF. func (iter *FileIter) Next() (*File, error) { for { name, entry, err := iter.w.Next() if err != nil { return nil, err } if entry.Mode == filemode.Dir || entry.Mode == filemode.Submodule { continue } blob, err := GetBlob(iter.s, entry.Hash) if err != nil { return nil, err } return NewFile(name, entry.Mode, blob), nil } } // ForEach call the cb function for each file contained in this iter until // an error happens or the end of the iter is reached. If plumbing.ErrStop is sent // the iteration is stop but no error is returned. The iterator is closed. func (iter *FileIter) ForEach(cb func(*File) error) error { defer iter.Close() for { f, err := iter.Next() if err != nil { if err == io.EOF { return nil } return err } if err := cb(f); err != nil { if err == storer.ErrStop { return nil } return err } } } func (iter *FileIter) Close() { iter.w.Close() }
package object import ( "fmt" "sort" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" ) // errIsReachable is thrown when first commit is an ancestor of the second var errIsReachable = fmt.Errorf("first is reachable from second") // MergeBase mimics the behavior of `git merge-base actual other`, returning the // best common ancestor between the actual and the passed one. // The best common ancestors can not be reached from other common ancestors. func (c *Commit) MergeBase(other *Commit) ([]*Commit, error) { // use sortedByCommitDateDesc strategy sorted := sortByCommitDateDesc(c, other) newer := sorted[0] older := sorted[1] newerHistory, err := ancestorsIndex(older, newer) if err == errIsReachable { return []*Commit{older}, nil } if err != nil { return nil, err } var res []*Commit inNewerHistory := isInIndexCommitFilter(newerHistory) resIter := NewFilterCommitIter(older, &inNewerHistory, &inNewerHistory) _ = resIter.ForEach(func(commit *Commit) error { res = append(res, commit) return nil }) return Independents(res) } // IsAncestor returns true if the actual commit is ancestor of the passed one. // It returns an error if the history is not transversable // It mimics the behavior of `git merge --is-ancestor actual other` func (c *Commit) IsAncestor(other *Commit) (bool, error) { found := false iter := NewCommitPreorderIter(other, nil, nil) err := iter.ForEach(func(comm *Commit) error { if comm.Hash != c.Hash { return nil } found = true return storer.ErrStop }) return found, err } // ancestorsIndex returns a map with the ancestors of the starting commit if the // excluded one is not one of them. It returns errIsReachable if the excluded commit // is ancestor of the starting, or another error if the history is not traversable. func ancestorsIndex(excluded, starting *Commit) (map[plumbing.Hash]struct{}, error) { if excluded.Hash.String() == starting.Hash.String() { return nil, errIsReachable } startingHistory := map[plumbing.Hash]struct{}{} startingIter := NewCommitIterBSF(starting, nil, nil) err := startingIter.ForEach(func(commit *Commit) error { if commit.Hash == excluded.Hash { return errIsReachable } startingHistory[commit.Hash] = struct{}{} return nil }) if err != nil { return nil, err } return startingHistory, nil } // Independents returns a subset of the passed commits, that are not reachable the others // It mimics the behavior of `git merge-base --independent commit...`. func Independents(commits []*Commit) ([]*Commit, error) { // use sortedByCommitDateDesc strategy candidates := sortByCommitDateDesc(commits...) candidates = removeDuplicated(candidates) seen := map[plumbing.Hash]struct{}{} var isLimit CommitFilter = func(commit *Commit) bool { _, ok := seen[commit.Hash] return ok } if len(candidates) < 2 { return candidates, nil } pos := 0 for { from := candidates[pos] others := remove(candidates, from) fromHistoryIter := NewFilterCommitIter(from, nil, &isLimit) err := fromHistoryIter.ForEach(func(fromAncestor *Commit) error { for _, other := range others { if fromAncestor.Hash == other.Hash { candidates = remove(candidates, other) others = remove(others, other) } } if len(candidates) == 1 { return storer.ErrStop } seen[fromAncestor.Hash] = struct{}{} return nil }) if err != nil { return nil, err } nextPos := indexOf(candidates, from) + 1 if nextPos >= len(candidates) { break } pos = nextPos } return candidates, nil } // sortByCommitDateDesc returns the passed commits, sorted by `committer.When desc` // // Following this strategy, it is tried to reduce the time needed when walking // the history from one commit to reach the others. It is assumed that ancestors // use to be committed before its descendant; // That way `Independents(A^, A)` will be processed as being `Independents(A, A^)`; // so starting by `A` it will be reached `A^` way sooner than walking from `A^` // to the initial commit, and then from `A` to `A^`. func sortByCommitDateDesc(commits ...*Commit) []*Commit { sorted := make([]*Commit, len(commits)) copy(sorted, commits) sort.Slice(sorted, func(i, j int) bool { return sorted[i].Committer.When.After(sorted[j].Committer.When) }) return sorted } // indexOf returns the first position where target was found in the passed commits func indexOf(commits []*Commit, target *Commit) int { for i, commit := range commits { if target.Hash == commit.Hash { return i } } return -1 } // remove returns the passed commits excluding the commit toDelete func remove(commits []*Commit, toDelete *Commit) []*Commit { res := make([]*Commit, len(commits)) j := 0 for _, commit := range commits { if commit.Hash == toDelete.Hash { continue } res[j] = commit j++ } return res[:j] } // removeDuplicated removes duplicated commits from the passed slice of commits func removeDuplicated(commits []*Commit) []*Commit { seen := make(map[plumbing.Hash]struct{}, len(commits)) res := make([]*Commit, len(commits)) j := 0 for _, commit := range commits { if _, ok := seen[commit.Hash]; ok { continue } seen[commit.Hash] = struct{}{} res[j] = commit j++ } return res[:j] } // isInIndexCommitFilter returns a commitFilter that returns true // if the commit is in the passed index. func isInIndexCommitFilter(index map[plumbing.Hash]struct{}) CommitFilter { return func(c *Commit) bool { _, ok := index[c.Hash] return ok } }
// Package object contains implementations of all Git objects and utility // functions to work with them. package object import ( "bytes" "errors" "fmt" "io" "strconv" "time" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" ) // ErrUnsupportedObject trigger when a non-supported object is being decoded. var ErrUnsupportedObject = errors.New("unsupported object type") // Object is a generic representation of any git object. It is implemented by // Commit, Tree, Blob, and Tag, and includes the functions that are common to // them. // // Object is returned when an object can be of any type. It is frequently used // with a type cast to acquire the specific type of object: // // func process(obj Object) { // switch o := obj.(type) { // case *Commit: // // o is a Commit // case *Tree: // // o is a Tree // case *Blob: // // o is a Blob // case *Tag: // // o is a Tag // } // } // // This interface is intentionally different from plumbing.EncodedObject, which // is a lower level interface used by storage implementations to read and write // objects in its encoded form. type Object interface { ID() plumbing.Hash Type() plumbing.ObjectType Decode(plumbing.EncodedObject) error Encode(plumbing.EncodedObject) error } // GetObject gets an object from an object storer and decodes it. func GetObject(s storer.EncodedObjectStorer, h plumbing.Hash) (Object, error) { o, err := s.EncodedObject(plumbing.AnyObject, h) if err != nil { return nil, err } return DecodeObject(s, o) } // DecodeObject decodes an encoded object into an Object and associates it to // the given object storer. func DecodeObject(s storer.EncodedObjectStorer, o plumbing.EncodedObject) (Object, error) { switch o.Type() { case plumbing.CommitObject: return DecodeCommit(s, o) case plumbing.TreeObject: return DecodeTree(s, o) case plumbing.BlobObject: return DecodeBlob(o) case plumbing.TagObject: return DecodeTag(s, o) default: return nil, plumbing.ErrInvalidType } } // DateFormat is the format being used in the original git implementation const DateFormat = "Mon Jan 02 15:04:05 2006 -0700" // Signature is used to identify who and when created a commit or tag. type Signature struct { // Name represents a person name. It is an arbitrary string. Name string // Email is an email, but it cannot be assumed to be well-formed. Email string // When is the timestamp of the signature. When time.Time } // Decode decodes a byte slice into a signature func (s *Signature) Decode(b []byte) { open := bytes.LastIndexByte(b, '<') close := bytes.LastIndexByte(b, '>') if open == -1 || close == -1 { return } if close < open { return } s.Name = string(bytes.Trim(b[:open], " ")) s.Email = string(b[open+1 : close]) hasTime := close+2 < len(b) if hasTime { s.decodeTimeAndTimeZone(b[close+2:]) } } // Encode encodes a Signature into a writer. func (s *Signature) Encode(w io.Writer) error { if _, err := fmt.Fprintf(w, "%s <%s> ", s.Name, s.Email); err != nil { return err } if err := s.encodeTimeAndTimeZone(w); err != nil { return err } return nil } var timeZoneLength = 5 func (s *Signature) decodeTimeAndTimeZone(b []byte) { space := bytes.IndexByte(b, ' ') if space == -1 { space = len(b) } ts, err := strconv.ParseInt(string(b[:space]), 10, 64) if err != nil { return } s.When = time.Unix(ts, 0).In(time.UTC) var tzStart = space + 1 if tzStart >= len(b) || tzStart+timeZoneLength > len(b) { return } timezone := string(b[tzStart : tzStart+timeZoneLength]) tzhours, err1 := strconv.ParseInt(timezone[0:3], 10, 64) tzmins, err2 := strconv.ParseInt(timezone[3:], 10, 64) if err1 != nil || err2 != nil { return } if tzhours < 0 { tzmins *= -1 } tz := time.FixedZone("", int(tzhours*60*60+tzmins*60)) s.When = s.When.In(tz) } func (s *Signature) encodeTimeAndTimeZone(w io.Writer) error { u := s.When.Unix() if u < 0 { u = 0 } _, err := fmt.Fprintf(w, "%d %s", u, s.When.Format("-0700")) return err } func (s *Signature) String() string { return fmt.Sprintf("%s <%s>", s.Name, s.Email) } // ObjectIter provides an iterator for a set of objects. type ObjectIter struct { storer.EncodedObjectIter s storer.EncodedObjectStorer } // NewObjectIter takes a storer.EncodedObjectStorer and a // storer.EncodedObjectIter and returns an *ObjectIter that iterates over all // objects contained in the storer.EncodedObjectIter. func NewObjectIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) *ObjectIter { return &ObjectIter{iter, s} } // Next moves the iterator to the next object and returns a pointer to it. If // there are no more objects, it returns io.EOF. func (iter *ObjectIter) Next() (Object, error) { for { obj, err := iter.EncodedObjectIter.Next() if err != nil { return nil, err } o, err := iter.toObject(obj) if err == plumbing.ErrInvalidType { continue } if err != nil { return nil, err } return o, nil } } // ForEach call the cb function for each object contained on this iter until // an error happens or the end of the iter is reached. If ErrStop is sent // the iteration is stop but no error is returned. The iterator is closed. func (iter *ObjectIter) ForEach(cb func(Object) error) error { return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error { o, err := iter.toObject(obj) if err == plumbing.ErrInvalidType { return nil } if err != nil { return err } return cb(o) }) } func (iter *ObjectIter) toObject(obj plumbing.EncodedObject) (Object, error) { switch obj.Type() { case plumbing.BlobObject: blob := &Blob{} return blob, blob.Decode(obj) case plumbing.TreeObject: tree := &Tree{s: iter.s} return tree, tree.Decode(obj) case plumbing.CommitObject: commit := &Commit{} return commit, commit.Decode(obj) case plumbing.TagObject: tag := &Tag{} return tag, tag.Decode(obj) default: return nil, plumbing.ErrInvalidType } }
package object import ( "bytes" "context" "errors" "fmt" "io" "strconv" "strings" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/filemode" fdiff "github.com/go-git/go-git/v6/plumbing/format/diff" "github.com/go-git/go-git/v6/utils/diff" dmp "github.com/sergi/go-diff/diffmatchpatch" ) var ( ErrCanceled = errors.New("operation canceled") ) func getPatch(message string, changes ...*Change) (*Patch, error) { ctx := context.Background() return getPatchContext(ctx, message, changes...) } func getPatchContext(ctx context.Context, message string, changes ...*Change) (*Patch, error) { var filePatches []fdiff.FilePatch for _, c := range changes { select { case <-ctx.Done(): return nil, ErrCanceled default: } fp, err := filePatchWithContext(ctx, c) if err != nil { return nil, err } filePatches = append(filePatches, fp) } return &Patch{message, filePatches}, nil } func filePatchWithContext(ctx context.Context, c *Change) (fdiff.FilePatch, error) { from, to, err := c.Files() if err != nil { return nil, err } fromContent, fIsBinary, err := fileContent(from) if err != nil { return nil, err } toContent, tIsBinary, err := fileContent(to) if err != nil { return nil, err } if fIsBinary || tIsBinary { return &textFilePatch{from: c.From, to: c.To}, nil } diffs := diff.Do(fromContent, toContent) var chunks []fdiff.Chunk for _, d := range diffs { select { case <-ctx.Done(): return nil, ErrCanceled default: } var op fdiff.Operation switch d.Type { case dmp.DiffEqual: op = fdiff.Equal case dmp.DiffDelete: op = fdiff.Delete case dmp.DiffInsert: op = fdiff.Add } chunks = append(chunks, &textChunk{d.Text, op}) } return &textFilePatch{ chunks: chunks, from: c.From, to: c.To, }, nil } func fileContent(f *File) (content string, isBinary bool, err error) { if f == nil { return } isBinary, err = f.IsBinary() if err != nil || isBinary { return } content, err = f.Contents() return } // Patch is an implementation of fdiff.Patch interface type Patch struct { message string filePatches []fdiff.FilePatch } func (p *Patch) FilePatches() []fdiff.FilePatch { return p.filePatches } func (p *Patch) Message() string { return p.message } func (p *Patch) Encode(w io.Writer) error { ue := fdiff.NewUnifiedEncoder(w, fdiff.DefaultContextLines) return ue.Encode(p) } func (p *Patch) Stats() FileStats { return getFileStatsFromFilePatches(p.FilePatches()) } func (p *Patch) String() string { buf := bytes.NewBuffer(nil) err := p.Encode(buf) if err != nil { return fmt.Sprintf("malformed patch: %s", err.Error()) } return buf.String() } // changeEntryWrapper is an implementation of fdiff.File interface type changeEntryWrapper struct { ce ChangeEntry } func (f *changeEntryWrapper) Hash() plumbing.Hash { if !f.ce.TreeEntry.Mode.IsFile() { return plumbing.ZeroHash } return f.ce.TreeEntry.Hash } func (f *changeEntryWrapper) Mode() filemode.FileMode { return f.ce.TreeEntry.Mode } func (f *changeEntryWrapper) Path() string { if !f.ce.TreeEntry.Mode.IsFile() { return "" } return f.ce.Name } func (f *changeEntryWrapper) Empty() bool { return !f.ce.TreeEntry.Mode.IsFile() } // textFilePatch is an implementation of fdiff.FilePatch interface type textFilePatch struct { chunks []fdiff.Chunk from, to ChangeEntry } func (tf *textFilePatch) Files() (from fdiff.File, to fdiff.File) { f := &changeEntryWrapper{tf.from} t := &changeEntryWrapper{tf.to} if !f.Empty() { from = f } if !t.Empty() { to = t } return } func (tf *textFilePatch) IsBinary() bool { return len(tf.chunks) == 0 } func (tf *textFilePatch) Chunks() []fdiff.Chunk { return tf.chunks } // textChunk is an implementation of fdiff.Chunk interface type textChunk struct { content string op fdiff.Operation } func (t *textChunk) Content() string { return t.content } func (t *textChunk) Type() fdiff.Operation { return t.op } // FileStat stores the status of changes in content of a file. type FileStat struct { Name string Addition int Deletion int } func (fs FileStat) String() string { return printStat([]FileStat{fs}) } // FileStats is a collection of FileStat. type FileStats []FileStat func (fileStats FileStats) String() string { return printStat(fileStats) } // printStat prints the stats of changes in content of files. // Original implementation: https://github.com/git/git/blob/1a87c842ece327d03d08096395969aca5e0a6996/diff.c#L2615 // Parts of the output: // <pad><filename><pad>|<pad><changeNumber><pad><+++/---><newline> // example: " main.go | 10 +++++++--- " func printStat(fileStats []FileStat) string { maxGraphWidth := uint(53) maxNameLen := 0 maxChangeLen := 0 scaleLinear := func(it, width, max uint) uint { if it == 0 || max == 0 { return 0 } return 1 + (it * (width - 1) / max) } for _, fs := range fileStats { if len(fs.Name) > maxNameLen { maxNameLen = len(fs.Name) } changes := strconv.Itoa(fs.Addition + fs.Deletion) if len(changes) > maxChangeLen { maxChangeLen = len(changes) } } result := "" for _, fs := range fileStats { add := uint(fs.Addition) del := uint(fs.Deletion) np := maxNameLen - len(fs.Name) cp := maxChangeLen - len(strconv.Itoa(fs.Addition+fs.Deletion)) total := add + del if total > maxGraphWidth { add = scaleLinear(add, maxGraphWidth, total) del = scaleLinear(del, maxGraphWidth, total) } adds := strings.Repeat("+", int(add)) dels := strings.Repeat("-", int(del)) namePad := strings.Repeat(" ", np) changePad := strings.Repeat(" ", cp) result += fmt.Sprintf(" %s%s | %s%d %s%s\n", fs.Name, namePad, changePad, total, adds, dels) } return result } func getFileStatsFromFilePatches(filePatches []fdiff.FilePatch) FileStats { var fileStats FileStats for _, fp := range filePatches { // ignore empty patches (binary files, submodule refs updates) if len(fp.Chunks()) == 0 { continue } cs := FileStat{} from, to := fp.Files() if from == nil { // New File is created. cs.Name = to.Path() } else if to == nil { // File is deleted. cs.Name = from.Path() } else if from.Path() != to.Path() { // File is renamed. cs.Name = fmt.Sprintf("%s => %s", from.Path(), to.Path()) } else { cs.Name = from.Path() } for _, chunk := range fp.Chunks() { s := chunk.Content() if len(s) == 0 { continue } switch chunk.Type() { case fdiff.Add: cs.Addition += strings.Count(s, "\n") if s[len(s)-1] != '\n' { cs.Addition++ } case fdiff.Delete: cs.Deletion += strings.Count(s, "\n") if s[len(s)-1] != '\n' { cs.Deletion++ } } } fileStats = append(fileStats, cs) } return fileStats }
package object import ( "errors" "io" "sort" "strings" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/filemode" "github.com/go-git/go-git/v6/utils/ioutil" "github.com/go-git/go-git/v6/utils/merkletrie" ) // DetectRenames detects the renames in the given changes on two trees with // the given options. It will return the given changes grouping additions and // deletions into modifications when possible. // If options is nil, the default diff tree options will be used. func DetectRenames( changes Changes, opts *DiffTreeOptions, ) (Changes, error) { if opts == nil { opts = DefaultDiffTreeOptions } detector := &renameDetector{ renameScore: int(opts.RenameScore), renameLimit: int(opts.RenameLimit), onlyExact: opts.OnlyExactRenames, } for _, c := range changes { action, err := c.Action() if err != nil { return nil, err } switch action { case merkletrie.Insert: detector.added = append(detector.added, c) case merkletrie.Delete: detector.deleted = append(detector.deleted, c) default: detector.modified = append(detector.modified, c) } } return detector.detect() } // renameDetector will detect and resolve renames in a set of changes. // see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java type renameDetector struct { added []*Change deleted []*Change modified []*Change renameScore int renameLimit int onlyExact bool } // detectExactRenames detects matches files that were deleted with files that // were added where the hash is the same on both. If there are multiple targets // the one with the most similar path will be chosen as the rename and the // rest as either deletions or additions. func (d *renameDetector) detectExactRenames() { added := groupChangesByHash(d.added) deletes := groupChangesByHash(d.deleted) var uniqueAdds []*Change var nonUniqueAdds [][]*Change var addedLeft []*Change for _, cs := range added { if len(cs) == 1 { uniqueAdds = append(uniqueAdds, cs[0]) } else { nonUniqueAdds = append(nonUniqueAdds, cs) } } for _, c := range uniqueAdds { hash := changeHash(c) deleted := deletes[hash] if len(deleted) == 1 { if sameMode(c, deleted[0]) { d.modified = append(d.modified, &Change{From: deleted[0].From, To: c.To}) delete(deletes, hash) } else { addedLeft = append(addedLeft, c) } } else if len(deleted) > 1 { bestMatch := bestNameMatch(c, deleted) if bestMatch != nil && sameMode(c, bestMatch) { d.modified = append(d.modified, &Change{From: bestMatch.From, To: c.To}) delete(deletes, hash) var newDeletes = make([]*Change, 0, len(deleted)-1) for _, d := range deleted { if d != bestMatch { newDeletes = append(newDeletes, d) } } deletes[hash] = newDeletes } } else { addedLeft = append(addedLeft, c) } } for _, added := range nonUniqueAdds { hash := changeHash(added[0]) deleted := deletes[hash] if len(deleted) == 1 { deleted := deleted[0] bestMatch := bestNameMatch(deleted, added) if bestMatch != nil && sameMode(deleted, bestMatch) { d.modified = append(d.modified, &Change{From: deleted.From, To: bestMatch.To}) delete(deletes, hash) for _, c := range added { if c != bestMatch { addedLeft = append(addedLeft, c) } } } else { addedLeft = append(addedLeft, added...) } } else if len(deleted) > 1 { maxSize := len(deleted) * len(added) if d.renameLimit > 0 && d.renameLimit < maxSize { maxSize = d.renameLimit } matrix := make(similarityMatrix, 0, maxSize) for delIdx, del := range deleted { deletedName := changeName(del) for addIdx, add := range added { addedName := changeName(add) score := nameSimilarityScore(addedName, deletedName) matrix = append(matrix, similarityPair{added: addIdx, deleted: delIdx, score: score}) if len(matrix) >= maxSize { break } } if len(matrix) >= maxSize { break } } sort.Stable(matrix) usedAdds := make(map[*Change]struct{}) usedDeletes := make(map[*Change]struct{}) for i := len(matrix) - 1; i >= 0; i-- { del := deleted[matrix[i].deleted] add := added[matrix[i].added] if add == nil || del == nil { // it was already matched continue } usedAdds[add] = struct{}{} usedDeletes[del] = struct{}{} d.modified = append(d.modified, &Change{From: del.From, To: add.To}) added[matrix[i].added] = nil deleted[matrix[i].deleted] = nil } for _, c := range added { if _, ok := usedAdds[c]; !ok && c != nil { addedLeft = append(addedLeft, c) } } var newDeletes = make([]*Change, 0, len(deleted)-len(usedDeletes)) for _, c := range deleted { if _, ok := usedDeletes[c]; !ok && c != nil { newDeletes = append(newDeletes, c) } } deletes[hash] = newDeletes } else { addedLeft = append(addedLeft, added...) } } d.added = addedLeft d.deleted = nil for _, dels := range deletes { d.deleted = append(d.deleted, dels...) } } // detectContentRenames detects renames based on the similarity of the content // in the files by building a matrix of pairs between sources and destinations // and matching by the highest score. // see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java func (d *renameDetector) detectContentRenames() error { cnt := max(len(d.added), len(d.deleted)) if d.renameLimit > 0 && cnt > d.renameLimit { return nil } srcs, dsts := d.deleted, d.added matrix, err := buildSimilarityMatrix(srcs, dsts, d.renameScore) if err != nil { return err } renames := make([]*Change, 0, min(len(matrix), len(dsts))) // Match rename pairs on a first come, first serve basis until // we have looked at everything that is above the minimum score. for i := len(matrix) - 1; i >= 0; i-- { pair := matrix[i] src := srcs[pair.deleted] dst := dsts[pair.added] if dst == nil || src == nil { // It was already matched before continue } renames = append(renames, &Change{From: src.From, To: dst.To}) // Claim destination and source as matched dsts[pair.added] = nil srcs[pair.deleted] = nil } d.modified = append(d.modified, renames...) d.added = compactChanges(dsts) d.deleted = compactChanges(srcs) return nil } func (d *renameDetector) detect() (Changes, error) { if len(d.added) > 0 && len(d.deleted) > 0 { d.detectExactRenames() if !d.onlyExact { if err := d.detectContentRenames(); err != nil { return nil, err } } } result := make(Changes, 0, len(d.added)+len(d.deleted)+len(d.modified)) result = append(result, d.added...) result = append(result, d.deleted...) result = append(result, d.modified...) sort.Stable(result) return result, nil } func bestNameMatch(change *Change, changes []*Change) *Change { var best *Change var bestScore int cname := changeName(change) for _, c := range changes { score := nameSimilarityScore(cname, changeName(c)) if score > bestScore { bestScore = score best = c } } return best } func nameSimilarityScore(a, b string) int { aDirLen := strings.LastIndexByte(a, '/') + 1 bDirLen := strings.LastIndexByte(b, '/') + 1 dirMin := min(aDirLen, bDirLen) dirMax := max(aDirLen, bDirLen) var dirScoreLtr, dirScoreRtl int if dirMax == 0 { dirScoreLtr = 100 dirScoreRtl = 100 } else { var dirSim int for ; dirSim < dirMin; dirSim++ { if a[dirSim] != b[dirSim] { break } } dirScoreLtr = dirSim * 100 / dirMax if dirScoreLtr == 100 { dirScoreRtl = 100 } else { for dirSim = 0; dirSim < dirMin; dirSim++ { if a[aDirLen-1-dirSim] != b[bDirLen-1-dirSim] { break } } dirScoreRtl = dirSim * 100 / dirMax } } fileMin := min(len(a)-aDirLen, len(b)-bDirLen) fileMax := max(len(a)-aDirLen, len(b)-bDirLen) fileSim := 0 for ; fileSim < fileMin; fileSim++ { if a[len(a)-1-fileSim] != b[len(b)-1-fileSim] { break } } fileScore := fileSim * 100 / fileMax return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100 } func changeName(c *Change) string { if c.To != empty { return c.To.Name } return c.From.Name } func changeHash(c *Change) plumbing.Hash { if c.To != empty { return c.To.TreeEntry.Hash } return c.From.TreeEntry.Hash } func changeMode(c *Change) filemode.FileMode { if c.To != empty { return c.To.TreeEntry.Mode } return c.From.TreeEntry.Mode } func sameMode(a, b *Change) bool { return changeMode(a) == changeMode(b) } func groupChangesByHash(changes []*Change) map[plumbing.Hash][]*Change { var result = make(map[plumbing.Hash][]*Change) for _, c := range changes { hash := changeHash(c) result[hash] = append(result[hash], c) } return result } type similarityMatrix []similarityPair func (m similarityMatrix) Len() int { return len(m) } func (m similarityMatrix) Swap(i, j int) { m[i], m[j] = m[j], m[i] } func (m similarityMatrix) Less(i, j int) bool { if m[i].score == m[j].score { if m[i].added == m[j].added { return m[i].deleted < m[j].deleted } return m[i].added < m[j].added } return m[i].score < m[j].score } type similarityPair struct { // index of the added file added int // index of the deleted file deleted int // similarity score score int } func max(a, b int) int { if a > b { return a } return b } func min(a, b int) int { if a < b { return a } return b } const maxMatrixSize = 10000 func buildSimilarityMatrix(srcs, dsts []*Change, renameScore int) (similarityMatrix, error) { // Allocate for the worst-case scenario where every pair has a score // that we need to consider. We might not need that many. matrixSize := len(srcs) * len(dsts) if matrixSize > maxMatrixSize { matrixSize = maxMatrixSize } matrix := make(similarityMatrix, 0, matrixSize) srcSizes := make([]int64, len(srcs)) dstSizes := make([]int64, len(dsts)) dstTooLarge := make(map[int]bool) // Consider each pair of files, if the score is above the minimum // threshold we need to record that scoring in the matrix so we can // later find the best matches. outerLoop: for srcIdx, src := range srcs { if changeMode(src) != filemode.Regular { continue } // Declare the from file and the similarity index here to be able to // reuse it inside the inner loop. The reason to not initialize them // here is so we can skip the initialization in case they happen to // not be needed later. They will be initialized inside the inner // loop if and only if they're needed and reused in subsequent passes. var from *File var s *similarityIndex var err error for dstIdx, dst := range dsts { if changeMode(dst) != filemode.Regular { continue } if dstTooLarge[dstIdx] { continue } var to *File srcSize := srcSizes[srcIdx] if srcSize == 0 { from, _, err = src.Files() if err != nil { return nil, err } srcSize = from.Size + 1 srcSizes[srcIdx] = srcSize } dstSize := dstSizes[dstIdx] if dstSize == 0 { _, to, err = dst.Files() if err != nil { return nil, err } dstSize = to.Size + 1 dstSizes[dstIdx] = dstSize } min, max := srcSize, dstSize if dstSize < srcSize { min = dstSize max = srcSize } if int(min*100/max) < renameScore { // File sizes are too different to be a match continue } if s == nil { s, err = fileSimilarityIndex(from) if err != nil { if err == errIndexFull { continue outerLoop } return nil, err } } if to == nil { _, to, err = dst.Files() if err != nil { return nil, err } } di, err := fileSimilarityIndex(to) if err != nil { if err == errIndexFull { dstTooLarge[dstIdx] = true } return nil, err } contentScore := s.score(di, 10000) // The name score returns a value between 0 and 100, so we need to // convert it to the same range as the content score. nameScore := nameSimilarityScore(src.From.Name, dst.To.Name) * 100 score := (contentScore*99 + nameScore*1) / 10000 if score < renameScore { continue } matrix = append(matrix, similarityPair{added: dstIdx, deleted: srcIdx, score: score}) } } sort.Stable(matrix) return matrix, nil } func compactChanges(changes []*Change) []*Change { var result []*Change for _, c := range changes { if c != nil { result = append(result, c) } } return result } const ( keyShift = 32 maxCountValue = (1 << keyShift) - 1 ) var errIndexFull = errors.New("index is full") // similarityIndex is an index structure of lines/blocks in one file. // This structure can be used to compute an approximation of the similarity // between two files. // To save space in memory, this index uses a space efficient encoding which // will not exceed 1MiB per instance. The index starts out at a smaller size // (closer to 2KiB), but may grow as more distinct blocks within the scanned // file are discovered. // see: https://github.com/eclipse/jgit/blob/master/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java type similarityIndex struct { hashed uint64 // number of non-zero entries in hashes numHashes int growAt int hashes []keyCountPair hashBits int } func fileSimilarityIndex(f *File) (*similarityIndex, error) { idx := newSimilarityIndex() if err := idx.hash(f); err != nil { return nil, err } sort.Stable(keyCountPairs(idx.hashes)) return idx, nil } func newSimilarityIndex() *similarityIndex { return &similarityIndex{ hashBits: 8, hashes: make([]keyCountPair, 1<<8), growAt: shouldGrowAt(8), } } func (i *similarityIndex) hash(f *File) error { isBin, err := f.IsBinary() if err != nil { return err } r, err := f.Reader() if err != nil { return err } defer ioutil.CheckClose(r, &err) return i.hashContent(r, f.Size, isBin) } func (i *similarityIndex) hashContent(r io.Reader, size int64, isBin bool) error { var buf = make([]byte, 4096) var ptr, cnt int remaining := size for 0 < remaining { hash := 5381 var blockHashedCnt uint64 // Hash one line or block, whatever happens first n := int64(0) for { if ptr == cnt { ptr = 0 var err error cnt, err = io.ReadFull(r, buf) if err != nil && err != io.ErrUnexpectedEOF { return err } if cnt == 0 { return io.EOF } } n++ c := buf[ptr] & 0xff ptr++ // Ignore CR in CRLF sequence if it's text if !isBin && c == '\r' && ptr < cnt && buf[ptr] == '\n' { continue } blockHashedCnt++ if c == '\n' { break } hash = (hash << 5) + hash + int(c) if n >= 64 || n >= remaining { break } } i.hashed += blockHashedCnt if err := i.add(hash, blockHashedCnt); err != nil { return err } remaining -= n } return nil } // score computes the similarity score between this index and another one. // A region of a file is defined as a line in a text file or a fixed-size // block in a binary file. To prepare an index, each region in the file is // hashed; the values and counts of hashes are retained in a sorted table. // Define the similarity fraction F as the count of matching regions between // the two files divided between the maximum count of regions in either file. // The similarity score is F multiplied by the maxScore constant, yielding a // range [0, maxScore]. It is defined as maxScore for the degenerate case of // two empty files. // The similarity score is symmetrical; i.e. a.score(b) == b.score(a). func (i *similarityIndex) score(other *similarityIndex, maxScore int) int { var maxHashed = i.hashed if maxHashed < other.hashed { maxHashed = other.hashed } if maxHashed == 0 { return maxScore } return int(i.common(other) * uint64(maxScore) / maxHashed) } func (i *similarityIndex) common(dst *similarityIndex) uint64 { srcIdx, dstIdx := 0, 0 if i.numHashes == 0 || dst.numHashes == 0 { return 0 } var common uint64 srcKey, dstKey := i.hashes[srcIdx].key(), dst.hashes[dstIdx].key() for { if srcKey == dstKey { srcCnt, dstCnt := i.hashes[srcIdx].count(), dst.hashes[dstIdx].count() if srcCnt < dstCnt { common += srcCnt } else { common += dstCnt } srcIdx++ if srcIdx == len(i.hashes) { break } srcKey = i.hashes[srcIdx].key() dstIdx++ if dstIdx == len(dst.hashes) { break } dstKey = dst.hashes[dstIdx].key() } else if srcKey < dstKey { // Region of src that is not in dst srcIdx++ if srcIdx == len(i.hashes) { break } srcKey = i.hashes[srcIdx].key() } else { // Region of dst that is not in src dstIdx++ if dstIdx == len(dst.hashes) { break } dstKey = dst.hashes[dstIdx].key() } } return common } func (i *similarityIndex) add(key int, cnt uint64) error { key = int(uint32(key) * 0x9e370001 >> 1) j := i.slot(key) for { v := i.hashes[j] if v == 0 { // It's an empty slot, so we can store it here. if i.growAt <= i.numHashes { if err := i.grow(); err != nil { return err } j = i.slot(key) continue } var err error i.hashes[j], err = newKeyCountPair(key, cnt) if err != nil { return err } i.numHashes++ return nil } else if v.key() == key { // It's the same key, so increment the counter. var err error i.hashes[j], err = newKeyCountPair(key, v.count()+cnt) return err } else if j+1 >= len(i.hashes) { j = 0 } else { j++ } } } type keyCountPair uint64 func newKeyCountPair(key int, cnt uint64) (keyCountPair, error) { if cnt > maxCountValue { return 0, errIndexFull } return keyCountPair((uint64(key) << keyShift) | cnt), nil } func (p keyCountPair) key() int { return int(p >> keyShift) } func (p keyCountPair) count() uint64 { return uint64(p) & maxCountValue } func (i *similarityIndex) slot(key int) int { // We use 31 - hashBits because the upper bit was already forced // to be 0 and we want the remaining high bits to be used as the // table slot. return int(uint32(key) >> uint(31-i.hashBits)) } func shouldGrowAt(hashBits int) int { return (1 << uint(hashBits)) * (hashBits - 3) / hashBits } func (i *similarityIndex) grow() error { if i.hashBits == 30 { return errIndexFull } old := i.hashes i.hashBits++ i.growAt = shouldGrowAt(i.hashBits) // TODO(erizocosmico): find a way to check if it will OOM and return // errIndexFull instead. i.hashes = make([]keyCountPair, 1<<uint(i.hashBits)) for _, v := range old { if v != 0 { j := i.slot(v.key()) for i.hashes[j] != 0 { j++ if j >= len(i.hashes) { j = 0 } } i.hashes[j] = v } } return nil } type keyCountPairs []keyCountPair func (p keyCountPairs) Len() int { return len(p) } func (p keyCountPairs) Swap(i, j int) { p[i], p[j] = p[j], p[i] } func (p keyCountPairs) Less(i, j int) bool { return p[i] < p[j] }
package object import "bytes" const ( signatureTypeUnknown signatureType = iota signatureTypeOpenPGP signatureTypeX509 signatureTypeSSH ) var ( // openPGPSignatureFormat is the format of an OpenPGP signature. openPGPSignatureFormat = signatureFormat{ []byte("-----BEGIN PGP SIGNATURE-----"), []byte("-----BEGIN PGP MESSAGE-----"), } // x509SignatureFormat is the format of an X509 signature, which is // a PKCS#7 (S/MIME) signature. x509SignatureFormat = signatureFormat{ []byte("-----BEGIN CERTIFICATE-----"), []byte("-----BEGIN SIGNED MESSAGE-----"), } // sshSignatureFormat is the format of an SSH signature. sshSignatureFormat = signatureFormat{ []byte("-----BEGIN SSH SIGNATURE-----"), } ) var ( // knownSignatureFormats is a map of known signature formats, indexed by // their signatureType. knownSignatureFormats = map[signatureType]signatureFormat{ signatureTypeOpenPGP: openPGPSignatureFormat, signatureTypeX509: x509SignatureFormat, signatureTypeSSH: sshSignatureFormat, } ) // signatureType represents the type of the signature. type signatureType int8 // signatureFormat represents the beginning of a signature. type signatureFormat [][]byte // typeForSignature returns the type of the signature based on its format. func typeForSignature(b []byte) signatureType { for t, i := range knownSignatureFormats { for _, begin := range i { if bytes.HasPrefix(b, begin) { return t } } } return signatureTypeUnknown } // parseSignedBytes returns the position of the last signature block found in // the given bytes. If no signature block is found, it returns -1. // // When multiple signature blocks are found, the position of the last one is // returned. Any tailing bytes after this signature block start should be // considered part of the signature. // // Given this, it would be safe to use the returned position to split the bytes // into two parts: the first part containing the message, the second part // containing the signature. // // Example: // // message := []byte(`Message with signature // // -----BEGIN SSH SIGNATURE----- // ...`) // // var signature string // if pos, _ := parseSignedBytes(message); pos != -1 { // signature = string(message[pos:]) // message = message[:pos] // } // // This logic is on par with git's gpg-interface.c:parse_signed_buffer(). // https://github.com/git/git/blob/7c2ef319c52c4997256f5807564523dfd4acdfc7/gpg-interface.c#L668 func parseSignedBytes(b []byte) (int, signatureType) { var n, match = 0, -1 var t signatureType for n < len(b) { var i = b[n:] if st := typeForSignature(i); st != signatureTypeUnknown { match = n t = st } if eol := bytes.IndexByte(i, '\n'); eol >= 0 { n += eol + 1 continue } // If we reach this point, we've reached the end. break } return match, t }
package object import ( "bytes" "fmt" "io" "strings" "github.com/ProtonMail/go-crypto/openpgp" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/ioutil" "github.com/go-git/go-git/v6/utils/sync" ) // Tag represents an annotated tag object. It points to a single git object of // any type, but tags typically are applied to commit or blob objects. It // provides a reference that associates the target with a tag name. It also // contains meta-information about the tag, including the tagger, tag date and // message. // // Note that this is not used for lightweight tags. // // https://git-scm.com/book/en/v2/Git-Internals-Git-References#Tags type Tag struct { // Hash of the tag. Hash plumbing.Hash // Name of the tag. Name string // Tagger is the one who created the tag. Tagger Signature // Message is an arbitrary text message. Message string // PGPSignature is the PGP signature of the tag. PGPSignature string // TargetType is the object type of the target. TargetType plumbing.ObjectType // Target is the hash of the target object. Target plumbing.Hash s storer.EncodedObjectStorer } // GetTag gets a tag from an object storer and decodes it. func GetTag(s storer.EncodedObjectStorer, h plumbing.Hash) (*Tag, error) { o, err := s.EncodedObject(plumbing.TagObject, h) if err != nil { return nil, err } return DecodeTag(s, o) } // DecodeTag decodes an encoded object into a *Commit and associates it to the // given object storer. func DecodeTag(s storer.EncodedObjectStorer, o plumbing.EncodedObject) (*Tag, error) { t := &Tag{s: s} if err := t.Decode(o); err != nil { return nil, err } return t, nil } // ID returns the object ID of the tag, not the object that the tag references. // The returned value will always match the current value of Tag.Hash. // // ID is present to fulfill the Object interface. func (t *Tag) ID() plumbing.Hash { return t.Hash } // Type returns the type of object. It always returns plumbing.TagObject. // // Type is present to fulfill the Object interface. func (t *Tag) Type() plumbing.ObjectType { return plumbing.TagObject } // Decode transforms a plumbing.EncodedObject into a Tag struct. func (t *Tag) Decode(o plumbing.EncodedObject) (err error) { if o.Type() != plumbing.TagObject { return ErrUnsupportedObject } t.Hash = o.Hash() reader, err := o.Reader() if err != nil { return err } defer ioutil.CheckClose(reader, &err) r := sync.GetBufioReader(reader) defer sync.PutBufioReader(r) for { var line []byte line, err = r.ReadBytes('\n') if err != nil && err != io.EOF { return err } line = bytes.TrimSpace(line) if len(line) == 0 { break // Start of message } split := bytes.SplitN(line, []byte{' '}, 2) switch string(split[0]) { case "object": t.Target = plumbing.NewHash(string(split[1])) case "type": t.TargetType, err = plumbing.ParseObjectType(string(split[1])) if err != nil { return err } case "tag": t.Name = string(split[1]) case "tagger": t.Tagger.Decode(split[1]) } if err == io.EOF { return nil } } data, err := io.ReadAll(r) if err != nil { return err } if sm, _ := parseSignedBytes(data); sm >= 0 { t.PGPSignature = string(data[sm:]) data = data[:sm] } t.Message = string(data) return nil } // Encode transforms a Tag into a plumbing.EncodedObject. func (t *Tag) Encode(o plumbing.EncodedObject) error { return t.encode(o, true) } // EncodeWithoutSignature export a Tag into a plumbing.EncodedObject without the signature (correspond to the payload of the PGP signature). func (t *Tag) EncodeWithoutSignature(o plumbing.EncodedObject) error { return t.encode(o, false) } func (t *Tag) encode(o plumbing.EncodedObject, includeSig bool) (err error) { o.SetType(plumbing.TagObject) w, err := o.Writer() if err != nil { return err } defer ioutil.CheckClose(w, &err) if _, err = fmt.Fprintf(w, "object %s\ntype %s\ntag %s\ntagger ", t.Target.String(), t.TargetType.Bytes(), t.Name); err != nil { return err } if err = t.Tagger.Encode(w); err != nil { return err } if _, err = fmt.Fprint(w, "\n\n"); err != nil { return err } if _, err = fmt.Fprint(w, t.Message); err != nil { return err } // Note that this is highly sensitive to what it sent along in the message. // Message *always* needs to end with a newline, or else the message and the // signature will be concatenated into a corrupt object. Since this is a // lower-level method, we assume you know what you are doing and have already // done the needful on the message in the caller. if includeSig { if _, err = fmt.Fprint(w, t.PGPSignature); err != nil { return err } } return err } // Commit returns the commit pointed to by the tag. If the tag points to a // different type of object ErrUnsupportedObject will be returned. func (t *Tag) Commit() (*Commit, error) { if t.TargetType != plumbing.CommitObject { return nil, ErrUnsupportedObject } o, err := t.s.EncodedObject(plumbing.CommitObject, t.Target) if err != nil { return nil, err } return DecodeCommit(t.s, o) } // Tree returns the tree pointed to by the tag. If the tag points to a commit // object the tree of that commit will be returned. If the tag does not point // to a commit or tree object ErrUnsupportedObject will be returned. func (t *Tag) Tree() (*Tree, error) { switch t.TargetType { case plumbing.CommitObject: c, err := t.Commit() if err != nil { return nil, err } return c.Tree() case plumbing.TreeObject: return GetTree(t.s, t.Target) default: return nil, ErrUnsupportedObject } } // Blob returns the blob pointed to by the tag. If the tag points to a // different type of object ErrUnsupportedObject will be returned. func (t *Tag) Blob() (*Blob, error) { if t.TargetType != plumbing.BlobObject { return nil, ErrUnsupportedObject } return GetBlob(t.s, t.Target) } // Object returns the object pointed to by the tag. func (t *Tag) Object() (Object, error) { o, err := t.s.EncodedObject(t.TargetType, t.Target) if err != nil { return nil, err } return DecodeObject(t.s, o) } // String returns the meta information contained in the tag as a formatted // string. func (t *Tag) String() string { obj, _ := t.Object() return fmt.Sprintf( "%s %s\nTagger: %s\nDate: %s\n\n%s\n%s", plumbing.TagObject, t.Name, t.Tagger.String(), t.Tagger.When.Format(DateFormat), t.Message, objectAsString(obj), ) } // Verify performs PGP verification of the tag with a provided armored // keyring and returns openpgp.Entity associated with verifying key on success. func (t *Tag) Verify(armoredKeyRing string) (*openpgp.Entity, error) { keyRingReader := strings.NewReader(armoredKeyRing) keyring, err := openpgp.ReadArmoredKeyRing(keyRingReader) if err != nil { return nil, err } // Extract signature. signature := strings.NewReader(t.PGPSignature) encoded := &plumbing.MemoryObject{} // Encode tag components, excluding signature and get a reader object. if err := t.EncodeWithoutSignature(encoded); err != nil { return nil, err } er, err := encoded.Reader() if err != nil { return nil, err } return openpgp.CheckArmoredDetachedSignature(keyring, er, signature, nil) } // TagIter provides an iterator for a set of tags. type TagIter struct { storer.EncodedObjectIter s storer.EncodedObjectStorer } // NewTagIter takes a storer.EncodedObjectStorer and a // storer.EncodedObjectIter and returns a *TagIter that iterates over all // tags contained in the storer.EncodedObjectIter. // // Any non-tag object returned by the storer.EncodedObjectIter is skipped. func NewTagIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) *TagIter { return &TagIter{iter, s} } // Next moves the iterator to the next tag and returns a pointer to it. If // there are no more tags, it returns io.EOF. func (iter *TagIter) Next() (*Tag, error) { obj, err := iter.EncodedObjectIter.Next() if err != nil { return nil, err } return DecodeTag(iter.s, obj) } // ForEach call the cb function for each tag contained on this iter until // an error happens or the end of the iter is reached. If ErrStop is sent // the iteration is stop but no error is returned. The iterator is closed. func (iter *TagIter) ForEach(cb func(*Tag) error) error { return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error { t, err := DecodeTag(iter.s, obj) if err != nil { return err } return cb(t) }) } func objectAsString(obj Object) string { switch o := obj.(type) { case *Commit: return o.String() default: return "" } }
package object import ( "context" "errors" "fmt" "io" "path" "path/filepath" "sort" "strings" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/filemode" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/utils/ioutil" "github.com/go-git/go-git/v6/utils/sync" ) const ( maxTreeDepth = 1024 startingStackSize = 8 ) // New errors defined by this package. var ( ErrMaxTreeDepth = errors.New("maximum tree depth exceeded") ErrFileNotFound = errors.New("file not found") ErrDirectoryNotFound = errors.New("directory not found") ErrEntryNotFound = errors.New("entry not found") ErrEntriesNotSorted = errors.New("entries in tree are not sorted") ) // Tree is basically like a directory - it references a bunch of other trees // and/or blobs (i.e. files and sub-directories) type Tree struct { Entries []TreeEntry Hash plumbing.Hash s storer.EncodedObjectStorer m map[string]*TreeEntry t map[string]*Tree // tree path cache } // GetTree gets a tree from an object storer and decodes it. func GetTree(s storer.EncodedObjectStorer, h plumbing.Hash) (*Tree, error) { o, err := s.EncodedObject(plumbing.TreeObject, h) if err != nil { return nil, err } return DecodeTree(s, o) } // DecodeTree decodes an encoded object into a *Tree and associates it to the // given object storer. func DecodeTree(s storer.EncodedObjectStorer, o plumbing.EncodedObject) (*Tree, error) { t := &Tree{s: s} if err := t.Decode(o); err != nil { return nil, err } return t, nil } // TreeEntry represents a file type TreeEntry struct { Name string Mode filemode.FileMode Hash plumbing.Hash } // File returns the hash of the file identified by the `path` argument. // The path is interpreted as relative to the tree receiver. func (t *Tree) File(path string) (*File, error) { e, err := t.FindEntry(path) if err != nil { return nil, ErrFileNotFound } blob, err := GetBlob(t.s, e.Hash) if err != nil { if errors.Is(err, plumbing.ErrObjectNotFound) { return nil, ErrFileNotFound } return nil, err } return NewFile(path, e.Mode, blob), nil } // Size returns the plaintext size of an object, without reading it // into memory. func (t *Tree) Size(path string) (int64, error) { e, err := t.FindEntry(path) if err != nil { return 0, ErrEntryNotFound } return t.s.EncodedObjectSize(e.Hash) } // Tree returns the tree identified by the `path` argument. // The path is interpreted as relative to the tree receiver. func (t *Tree) Tree(path string) (*Tree, error) { e, err := t.FindEntry(path) if err != nil { return nil, ErrDirectoryNotFound } tree, err := GetTree(t.s, e.Hash) if errors.Is(err, plumbing.ErrObjectNotFound) { return nil, ErrDirectoryNotFound } return tree, err } // TreeEntryFile returns the *File for a given *TreeEntry. func (t *Tree) TreeEntryFile(e *TreeEntry) (*File, error) { blob, err := GetBlob(t.s, e.Hash) if err != nil { return nil, err } return NewFile(e.Name, e.Mode, blob), nil } // FindEntry search a TreeEntry in this tree or any subtree. func (t *Tree) FindEntry(path string) (*TreeEntry, error) { if t.t == nil { t.t = make(map[string]*Tree) } pathParts := strings.Split(path, "/") startingTree := t pathCurrent := "" // search for the longest path in the tree path cache for i := len(pathParts) - 1; i > 1; i-- { path := filepath.Join(pathParts[:i]...) tree, ok := t.t[path] if ok { startingTree = tree pathParts = pathParts[i:] pathCurrent = path break } } var tree *Tree var err error for tree = startingTree; len(pathParts) > 1; pathParts = pathParts[1:] { if tree, err = tree.dir(pathParts[0]); err != nil { return nil, err } pathCurrent = filepath.Join(pathCurrent, pathParts[0]) t.t[pathCurrent] = tree } return tree.entry(pathParts[0]) } func (t *Tree) dir(baseName string) (*Tree, error) { entry, err := t.entry(baseName) if err != nil { return nil, ErrDirectoryNotFound } obj, err := t.s.EncodedObject(plumbing.TreeObject, entry.Hash) if err != nil { return nil, err } tree := &Tree{s: t.s} err = tree.Decode(obj) return tree, err } func (t *Tree) entry(baseName string) (*TreeEntry, error) { if t.m == nil { t.buildMap() } entry, ok := t.m[baseName] if !ok { return nil, ErrEntryNotFound } return entry, nil } // Files returns a FileIter allowing to iterate over the Tree func (t *Tree) Files() *FileIter { return NewFileIter(t.s, t) } // ID returns the object ID of the tree. The returned value will always match // the current value of Tree.Hash. // // ID is present to fulfill the Object interface. func (t *Tree) ID() plumbing.Hash { return t.Hash } // Type returns the type of object. It always returns plumbing.TreeObject. func (t *Tree) Type() plumbing.ObjectType { return plumbing.TreeObject } // Decode transform an plumbing.EncodedObject into a Tree struct func (t *Tree) Decode(o plumbing.EncodedObject) (err error) { if o.Type() != plumbing.TreeObject { return ErrUnsupportedObject } t.Hash = o.Hash() if o.Size() == 0 { return nil } t.Entries = nil t.m = nil reader, err := o.Reader() if err != nil { return err } defer ioutil.CheckClose(reader, &err) r := sync.GetBufioReader(reader) defer sync.PutBufioReader(r) for { str, err := r.ReadString(' ') if err != nil { if err == io.EOF { break } return err } str = str[:len(str)-1] // strip last byte (' ') mode, err := filemode.New(str) if err != nil { return err } name, err := r.ReadString(0) if err != nil && err != io.EOF { return err } var hash plumbing.Hash if _, err = hash.ReadFrom(r); err != nil { return err } baseName := name[:len(name)-1] t.Entries = append(t.Entries, TreeEntry{ Hash: hash, Mode: mode, Name: baseName, }) } return nil } type TreeEntrySorter []TreeEntry func (s TreeEntrySorter) Len() int { return len(s) } func (s TreeEntrySorter) Less(i, j int) bool { name1 := s[i].Name name2 := s[j].Name if s[i].Mode == filemode.Dir { name1 += "/" } if s[j].Mode == filemode.Dir { name2 += "/" } return name1 < name2 } func (s TreeEntrySorter) Swap(i, j int) { s[i], s[j] = s[j], s[i] } // Encode transforms a Tree into a plumbing.EncodedObject. // The tree entries must be sorted by name. func (t *Tree) Encode(o plumbing.EncodedObject) (err error) { o.SetType(plumbing.TreeObject) w, err := o.Writer() if err != nil { return err } defer ioutil.CheckClose(w, &err) if !sort.IsSorted(TreeEntrySorter(t.Entries)) { return ErrEntriesNotSorted } for _, entry := range t.Entries { if strings.IndexByte(entry.Name, 0) != -1 { return fmt.Errorf("malformed filename %q", entry.Name) } if _, err = fmt.Fprintf(w, "%o %s", entry.Mode, entry.Name); err != nil { return err } if _, err = w.Write([]byte{0x00}); err != nil { return err } if _, err = entry.Hash.WriteTo(w); err != nil { return err } } return err } func (t *Tree) buildMap() { t.m = make(map[string]*TreeEntry) for i := 0; i < len(t.Entries); i++ { t.m[t.Entries[i].Name] = &t.Entries[i] } } // Diff returns a list of changes between this tree and the provided one func (t *Tree) Diff(to *Tree) (Changes, error) { return t.DiffContext(context.Background(), to) } // DiffContext returns a list of changes between this tree and the provided one // Error will be returned if context expires. Provided context must be non nil. // // NOTE: Since version 5.1.0 the renames are correctly handled, the settings // used are the recommended options DefaultDiffTreeOptions. func (t *Tree) DiffContext(ctx context.Context, to *Tree) (Changes, error) { return DiffTreeWithOptions(ctx, t, to, DefaultDiffTreeOptions) } // Patch returns a slice of Patch objects with all the changes between trees // in chunks. This representation can be used to create several diff outputs. func (t *Tree) Patch(to *Tree) (*Patch, error) { return t.PatchContext(context.Background(), to) } // PatchContext returns a slice of Patch objects with all the changes between // trees in chunks. This representation can be used to create several diff // outputs. If context expires, an error will be returned. Provided context must // be non-nil. // // NOTE: Since version 5.1.0 the renames are correctly handled, the settings // used are the recommended options DefaultDiffTreeOptions. func (t *Tree) PatchContext(ctx context.Context, to *Tree) (*Patch, error) { changes, err := t.DiffContext(ctx, to) if err != nil { return nil, err } return changes.PatchContext(ctx) } // treeEntryIter facilitates iterating through the TreeEntry objects in a Tree. type treeEntryIter struct { t *Tree pos int } func (iter *treeEntryIter) Next() (TreeEntry, error) { if iter.pos >= len(iter.t.Entries) { return TreeEntry{}, io.EOF } iter.pos++ return iter.t.Entries[iter.pos-1], nil } // TreeWalker provides a means of walking through all of the entries in a Tree. type TreeWalker struct { stack []*treeEntryIter base string recursive bool seen map[plumbing.Hash]bool s storer.EncodedObjectStorer t *Tree } // NewTreeWalker returns a new TreeWalker for the given tree. // // It is the caller's responsibility to call Close() when finished with the // tree walker. func NewTreeWalker(t *Tree, recursive bool, seen map[plumbing.Hash]bool) *TreeWalker { stack := make([]*treeEntryIter, 0, startingStackSize) stack = append(stack, &treeEntryIter{t, 0}) return &TreeWalker{ stack: stack, recursive: recursive, seen: seen, s: t.s, t: t, } } // Next returns the next object from the tree. Objects are returned in order // and subtrees are included. After the last object has been returned further // calls to Next() will return io.EOF. // // In the current implementation any objects which cannot be found in the // underlying repository will be skipped automatically. It is possible that this // may change in future versions. func (w *TreeWalker) Next() (name string, entry TreeEntry, err error) { var obj *Tree for { current := len(w.stack) - 1 if current < 0 { // Nothing left on the stack so we're finished err = io.EOF return } if current > maxTreeDepth { // We're probably following bad data or some self-referencing tree err = ErrMaxTreeDepth return } entry, err = w.stack[current].Next() if err == io.EOF { // Finished with the current tree, move back up to the parent w.stack = w.stack[:current] w.base, _ = path.Split(w.base) w.base = strings.TrimSuffix(w.base, "/") continue } if err != nil { return } if w.seen[entry.Hash] { continue } if entry.Mode == filemode.Dir { obj, err = GetTree(w.s, entry.Hash) } name = simpleJoin(w.base, entry.Name) if err != nil { err = io.EOF return } break } if !w.recursive { return } if obj != nil { w.stack = append(w.stack, &treeEntryIter{obj, 0}) w.base = simpleJoin(w.base, entry.Name) } return } // Tree returns the tree that the tree walker most recently operated on. func (w *TreeWalker) Tree() *Tree { current := len(w.stack) - 1 if w.stack[current].pos == 0 { current-- } if current < 0 { return nil } return w.stack[current].t } // Close releases any resources used by the TreeWalker. func (w *TreeWalker) Close() { w.stack = nil } // TreeIter provides an iterator for a set of trees. type TreeIter struct { storer.EncodedObjectIter s storer.EncodedObjectStorer } // NewTreeIter takes a storer.EncodedObjectStorer and a // storer.EncodedObjectIter and returns a *TreeIter that iterates over all // tree contained in the storer.EncodedObjectIter. // // Any non-tree object returned by the storer.EncodedObjectIter is skipped. func NewTreeIter(s storer.EncodedObjectStorer, iter storer.EncodedObjectIter) *TreeIter { return &TreeIter{iter, s} } // Next moves the iterator to the next tree and returns a pointer to it. If // there are no more trees, it returns io.EOF. func (iter *TreeIter) Next() (*Tree, error) { for { obj, err := iter.EncodedObjectIter.Next() if err != nil { return nil, err } if obj.Type() != plumbing.TreeObject { continue } return DecodeTree(iter.s, obj) } } // ForEach call the cb function for each tree contained on this iter until // an error happens or the end of the iter is reached. If ErrStop is sent // the iteration is stop but no error is returned. The iterator is closed. func (iter *TreeIter) ForEach(cb func(*Tree) error) error { return iter.EncodedObjectIter.ForEach(func(obj plumbing.EncodedObject) error { if obj.Type() != plumbing.TreeObject { return nil } t, err := DecodeTree(iter.s, obj) if err != nil { return err } return cb(t) }) } func simpleJoin(parent, child string) string { if len(parent) > 0 { return parent + "/" + child } return child }
package object import ( "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/filemode" "github.com/go-git/go-git/v6/utils/merkletrie/noder" ) // A treenoder is a helper type that wraps git trees into merkletrie // noders. // // As a merkletrie noder doesn't understand the concept of modes (e.g. // file permissions), the treenoder includes the mode of the git tree in // the hash, so changes in the modes will be detected as modifications // to the file contents by the merkletrie difftree algorithm. This is // consistent with how the "git diff-tree" command works. type treeNoder struct { parent *Tree // the root node is its own parent name string // empty string for the root node mode filemode.FileMode hash plumbing.Hash children []noder.Noder // memoized } // NewTreeRootNode returns the root node of a Tree func NewTreeRootNode(t *Tree) noder.Noder { if t == nil { return &treeNoder{} } return &treeNoder{ parent: t, name: "", mode: filemode.Dir, hash: t.Hash, } } func (t *treeNoder) Skip() bool { return false } func (t *treeNoder) isRoot() bool { return t.name == "" } func (t *treeNoder) String() string { return "treeNoder <" + t.name + ">" } func (t *treeNoder) Hash() []byte { if t.mode == filemode.Deprecated { return append(t.hash.Bytes(), filemode.Regular.Bytes()...) } return append(t.hash.Bytes(), t.mode.Bytes()...) } func (t *treeNoder) Name() string { return t.name } func (t *treeNoder) IsDir() bool { return t.mode == filemode.Dir } // Children will return the children of a treenoder as treenoders, // building them from the children of the wrapped git tree. func (t *treeNoder) Children() ([]noder.Noder, error) { if t.mode != filemode.Dir { return noder.NoChildren, nil } // children are memoized for efficiency if t.children != nil { return t.children, nil } // the parent of the returned children will be ourself as a tree if // we are a not the root treenoder. The root is special as it // is is own parent. parent := t.parent if !t.isRoot() { var err error if parent, err = t.parent.Tree(t.name); err != nil { return nil, err } } var err error t.children, err = transformChildren(parent) return t.children, err } // Returns the children of a tree as treenoders. // Efficiency is key here. func transformChildren(t *Tree) ([]noder.Noder, error) { var err error var e TreeEntry // there will be more tree entries than children in the tree, // due to submodules and empty directories, but I think it is still // worth it to pre-allocate the whole array now, even if sometimes // is bigger than needed. ret := make([]noder.Noder, 0, len(t.Entries)) walker := NewTreeWalker(t, false, nil) // don't recurse // don't defer walker.Close() for efficiency reasons. for { _, e, err = walker.Next() if err == io.EOF { break } if err != nil { walker.Close() return nil, err } ret = append(ret, &treeNoder{ parent: t, name: e.Name, mode: e.Mode, hash: e.Hash, }) } walker.Close() return ret, nil } // len(t.tree.Entries) != the number of elements walked by treewalker // for some reason because of empty directories, submodules, etc, so we // have to walk here. func (t *treeNoder) NumChildren() (int, error) { children, err := t.Children() if err != nil { return 0, err } return len(children), nil }
// Package transport implements the git pack protocol with a pluggable // This is a low-level package to implement new transports. Use a concrete // implementation instead (e.g. http, file, ssh). // // A simple example of usage can be found in the file package. package transport import ( "context" "errors" "io" "regexp" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/protocol" "github.com/go-git/go-git/v6/plumbing/protocol/packp" "github.com/go-git/go-git/v6/plumbing/protocol/packp/capability" "github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband" "github.com/go-git/go-git/v6/storage" ) const ( readErrorSecondsTimeout = 10 ) var ( // ErrUnsupportedVersion is returned when the protocol version is not // supported. ErrUnsupportedVersion = errors.New("unsupported protocol version") // ErrUnsupportedService is returned when the service is not supported. ErrUnsupportedService = errors.New("unsupported service") // ErrInvalidResponse is returned when the response is invalid. ErrInvalidResponse = errors.New("invalid response") // ErrTimeoutExceeded is returned when the timeout is exceeded. ErrTimeoutExceeded = errors.New("timeout exceeded") // ErrPackedObjectsNotSupported is returned when the server does not support // packed objects. ErrPackedObjectsNotSupported = errors.New("packed objects not supported") // stdErrSkipPattern is used for skipping lines from a command's stderr output. // Any line matching this pattern will be skipped from further // processing and not be returned to calling code. stdErrSkipPattern = regexp.MustCompile("^remote:( =*){0,1}$") ) // RemoteError represents an error returned by the remote. // TODO: embed error type RemoteError struct { Reason string } // Error implements the error interface. func (e *RemoteError) Error() string { return e.Reason } // NewRemoteError creates a new RemoteError. func NewRemoteError(reason string) error { return &RemoteError{Reason: reason} } // Connection represents a session endpoint connection. type Connection interface { // Close closes the connection. Close() error // Capabilities returns the list of capabilities supported by the server. Capabilities() *capability.List // Version returns the Git protocol version the server supports. Version() protocol.Version // StatelessRPC indicates that the connection is a half-duplex connection // and should operate in half-duplex mode i.e. performs a single read-write // cycle. This fits with the HTTP POST request process where session may // read the request, write a response, and exit. StatelessRPC() bool // GetRemoteRefs returns the references advertised by the remote. // Using protocol v0 or v1, this returns the references advertised by the // remote during the handshake. Using protocol v2, this runs the ls-refs // command on the remote. // This will error if the session is not already established using // Handshake. GetRemoteRefs(ctx context.Context) ([]*plumbing.Reference, error) // Fetch sends a fetch-pack request to the server. Fetch(ctx context.Context, req *FetchRequest) error // Push sends a send-pack request to the server. Push(ctx context.Context, req *PushRequest) error } var _ io.Closer = Connection(nil) // FetchRequest contains the parameters for a fetch-pack request. // This is used during the pack negotiation phase of the fetch operation. // See https://git-scm.com/docs/pack-protocol#_packfile_negotiation type FetchRequest struct { // Progress is the progress sideband. Progress sideband.Progress // Wants is the list of references to fetch. // TODO: Build this slice in the transport package. Wants []plumbing.Hash // Haves is the list of references the client already has. // TODO: Build this slice in the transport package. Haves []plumbing.Hash // Depth is the depth of the fetch. Depth int // Filter holds the filters to be applied when deciding what // objects will be added to the packfile. Filter packp.Filter // IncludeTags indicates whether tags should be fetched. IncludeTags bool } // PushRequest contains the parameters for a push request. type PushRequest struct { // Packfile is the packfile reader. Packfile io.ReadCloser // Commands is the list of push commands to be sent to the server. // TODO: build the Commands slice in the transport package. Commands []*packp.Command // Progress is the progress sideband. Progress sideband.Progress // Options is a set of push-options to be sent to the server during push. Options []string // Atomic indicates an atomic push. // If the server supports atomic push, it will update the refs in one // atomic transaction. Either all refs are updated or none. Atomic bool } // Session is a Git protocol transfer session. // This is used by all protocols. type Session interface { // Handshake starts the negotiation with the remote to get version if not // already connected. // Params are the optional extra parameters to be sent to the server. Use // this to send the protocol version of the client and any other extra parameters. Handshake(ctx context.Context, service Service, params ...string) (Connection, error) } // Commander creates Command instances. This is the main entry point for // transport implementations. type Commander interface { // Connect creates a new Command for the given git command and // endpoint. cmd can be git-upload-pack or git-receive-pack. An // error should be returned if the endpoint is not supported or the // command cannot be created (e.g. binary does not exist, connection // cannot be established). Command(ctx context.Context, cmd string, ep *Endpoint, auth AuthMethod, params ...string) (Command, error) } // Command is used for a single command execution. // This interface is modeled after exec.Cmd and ssh.Session in the standard // library. type Command interface { // StderrPipe returns a pipe that will be connected to the command's // standard error when the command starts. It should not be called after // Start. StderrPipe() (io.Reader, error) // StdinPipe returns a pipe that will be connected to the command's // standard input when the command starts. It should not be called after // Start. The pipe should be closed when no more input is expected. StdinPipe() (io.WriteCloser, error) // StdoutPipe returns a pipe that will be connected to the command's // standard output when the command starts. It should not be called after // Start. StdoutPipe() (io.Reader, error) // Start starts the specified command. It does not wait for it to // complete. Start() error // Close closes the command and releases any resources used by it. It // will block until the command exits. Close() error } // CommandKiller expands the Command interface, enabling it for being killed. type CommandKiller interface { // Kill and close the session whatever the state it is. It will block until // the command is terminated. Kill() error } type client struct { cmdr Commander } // NewPackTransport creates a new client using the given Commander. func NewPackTransport(runner Commander) Transport { return &client{runner} } // NewSession returns a new session for an endpoint. func (c *client) NewSession(st storage.Storer, ep *Endpoint, auth AuthMethod) (Session, error) { return NewPackSession(st, ep, auth, c.cmdr) } // SupportedProtocols returns a list of supported Git protocol versions by // the transport client. func (c *client) SupportedProtocols() []protocol.Version { return []protocol.Version{ protocol.V0, protocol.V1, } }
package transport import ( "context" "io" "github.com/go-git/go-git/v6/plumbing/format/packfile" "github.com/go-git/go-git/v6/plumbing/protocol/packp" "github.com/go-git/go-git/v6/plumbing/protocol/packp/capability" "github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband" "github.com/go-git/go-git/v6/storage" "github.com/go-git/go-git/v6/utils/ioutil" ) // FetchPack fetches a packfile from the remote connection into the given // storage repository and updates the shallow information. func FetchPack( ctx context.Context, st storage.Storer, conn Connection, packf io.ReadCloser, shallowInfo *packp.ShallowUpdate, req *FetchRequest, ) (err error) { packf = ioutil.NewContextReadCloser(ctx, packf) // Do we have sideband enabled? var demuxer *sideband.Demuxer var reader io.Reader = packf caps := conn.Capabilities() if caps.Supports(capability.Sideband64k) { demuxer = sideband.NewDemuxer(sideband.Sideband64k, reader) } else if caps.Supports(capability.Sideband) { demuxer = sideband.NewDemuxer(sideband.Sideband, reader) } if demuxer != nil && req.Progress != nil { demuxer.Progress = req.Progress reader = demuxer } if err := packfile.UpdateObjectStorage(st, reader); err != nil { return err } if err := packf.Close(); err != nil { return err } // Update shallow if shallowInfo != nil { if err := updateShallow(st, shallowInfo); err != nil { return err } } return nil } func updateShallow(st storage.Storer, shallowInfo *packp.ShallowUpdate) error { shallows, err := st.Shallow() if err != nil { return err } outer: for _, s := range shallowInfo.Shallows { for _, oldS := range shallows { if s == oldS { continue outer } } shallows = append(shallows, s) } // unshallow commits for _, s := range shallowInfo.Unshallows { for i, oldS := range shallows { if s == oldS { shallows = append(shallows[:i], shallows[i+1:]...) break } } } return st.SetShallow(shallows) }
package transport import ( "path/filepath" "github.com/go-git/go-git/v6/plumbing/cache" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/storage" "github.com/go-git/go-git/v6/storage/filesystem" "github.com/go-git/go-billy/v5" "github.com/go-git/go-billy/v5/osfs" ) // DefaultLoader is a filesystem loader ignoring host and resolving paths to /. var DefaultLoader = NewFilesystemLoader(osfs.New(""), false) // Loader loads repository's storer.Storer based on an optional host and a path. type Loader interface { // Load loads a storer.Storer given a transport.Endpoint. // Returns transport.ErrRepositoryNotFound if the repository does not // exist. Load(ep *Endpoint) (storage.Storer, error) } // FilesystemLoader is a Loader that uses a billy.Filesystem to load // repositories from the file system. It ignores the host and resolves paths to // the given base filesystem. type FilesystemLoader struct { base billy.Filesystem strict bool } // NewFilesystemLoader creates a Loader that ignores host and resolves paths // with a given base filesystem. func NewFilesystemLoader(base billy.Filesystem, strict bool) Loader { return &FilesystemLoader{base, strict} } // Load looks up the endpoint's path in the base file system and returns a // storer for it. Returns transport.ErrRepositoryNotFound if a repository does // not exist in the given path. func (l *FilesystemLoader) Load(ep *Endpoint) (storage.Storer, error) { return l.load(ep, false) } func (l *FilesystemLoader) load(ep *Endpoint, tried bool) (storage.Storer, error) { fs, err := l.base.Chroot(ep.Path) if err != nil { return nil, err } if _, err := fs.Stat("config"); err != nil { if !l.strict && !tried { if fi, err := fs.Stat(".git"); err == nil && fi.IsDir() { ep.Path = filepath.Join(ep.Path, ".git") } else { ep.Path = ep.Path + ".git" } return l.load(ep, true) } return nil, ErrRepositoryNotFound } return filesystem.NewStorage(fs, cache.NewObjectLRUDefault()), nil } // MapLoader is a Loader that uses a lookup map of storer.Storer by // transport.Endpoint. type MapLoader map[string]storer.Storer // Load returns a storer.Storer for given a transport.Endpoint by looking it up // in the map. Returns transport.ErrRepositoryNotFound if the endpoint does not // exist. func (l MapLoader) Load(ep *Endpoint) (storer.Storer, error) { s, ok := l[ep.String()] if !ok { return nil, ErrRepositoryNotFound } return s, nil }
package transport import ( "bytes" "context" "io" "github.com/go-git/go-git/v6/utils/ioutil" ) type mockCommand struct { stdin bytes.Buffer stdout bytes.Buffer stderr bytes.Buffer } func (c mockCommand) StderrPipe() (io.Reader, error) { return &c.stderr, nil } func (c mockCommand) StdinPipe() (io.WriteCloser, error) { return ioutil.WriteNopCloser(&c.stdin), nil } func (c mockCommand) StdoutPipe() (io.Reader, error) { return &c.stdout, nil } func (c mockCommand) Start() error { return nil } func (c mockCommand) Close() error { return nil } type mockCommander struct { stderr string } func (c mockCommander) Command(_ context.Context, cmd string, ep *Endpoint, auth AuthMethod, _ ...string) (Command, error) { return &mockCommand{ stderr: *bytes.NewBufferString(c.stderr), }, nil }
package transport import ( "context" "errors" "fmt" "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/format/pktline" "github.com/go-git/go-git/v6/plumbing/protocol/packp" "github.com/go-git/go-git/v6/plumbing/protocol/packp/capability" "github.com/go-git/go-git/v6/storage" "github.com/go-git/go-git/v6/utils/ioutil" ) var ( ErrFilterNotSupported = errors.New("server does not support filters") ErrShallowNotSupported = errors.New("server does not support shallow clients") ) // NegotiatePack returns the result of the pack negotiation phase of the fetch operation. // See https://git-scm.com/docs/pack-protocol#_packfile_negotiation func NegotiatePack( ctx context.Context, st storage.Storer, conn Connection, reader io.Reader, writer io.WriteCloser, req *FetchRequest, ) (shallowInfo *packp.ShallowUpdate, err error) { reader = ioutil.NewContextReader(ctx, reader) writer = ioutil.NewContextWriteCloser(ctx, writer) caps := conn.Capabilities() // Create upload-request upreq := packp.NewUploadRequest() multiAck := caps.Supports(capability.MultiACK) multiAckDetailed := caps.Supports(capability.MultiACKDetailed) if multiAckDetailed { upreq.Capabilities.Set(capability.MultiACKDetailed) // nolint: errcheck } else if multiAck { upreq.Capabilities.Set(capability.MultiACK) // nolint: errcheck } if req.Progress != nil { if caps.Supports(capability.Sideband64k) { upreq.Capabilities.Set(capability.Sideband64k) // nolint: errcheck } else if caps.Supports(capability.Sideband) { upreq.Capabilities.Set(capability.Sideband) // nolint: errcheck } } else if caps.Supports(capability.NoProgress) { upreq.Capabilities.Set(capability.NoProgress) // nolint: errcheck } // TODO: support thin-pack // if caps.Supports(capability.ThinPack) { // upreq.Capabilities.Set(capability.ThinPack) // nolint: errcheck // } if caps.Supports(capability.OFSDelta) { upreq.Capabilities.Set(capability.OFSDelta) // nolint: errcheck } if caps.Supports(capability.Agent) { upreq.Capabilities.Set(capability.Agent, capability.DefaultAgent()) // nolint: errcheck } if req.IncludeTags && caps.Supports(capability.IncludeTag) { upreq.Capabilities.Set(capability.IncludeTag) // nolint: errcheck } if req.Filter != "" { if caps.Supports(capability.Filter) { upreq.Filter = req.Filter if err := upreq.Capabilities.Set(capability.Filter); err != nil { return nil, err } } else { return nil, ErrFilterNotSupported } } upreq.Wants = req.Wants if req.Depth > 0 { if !caps.Supports(capability.Shallow) { return nil, ErrShallowNotSupported } upreq.Depth = packp.DepthCommits(req.Depth) upreq.Shallows, err = st.Shallow() if err != nil { return nil, err } } // Note: empty request means haves are a subset of wants, in that case we have // everything we asked for. Close the connection and return nil. if isSubset(req.Wants, req.Haves) && len(upreq.Shallows) == 0 { if err := pktline.WriteFlush(writer); err != nil { return nil, err } // Close the writer to signal the end of the request if err := writer.Close(); err != nil { return nil, fmt.Errorf("closing writer: %s", err) } return nil, ErrNoChange } // Create upload-haves common := map[plumbing.Hash]struct{}{} var inVein int var done bool var gotContinue bool // whether we got a continue from the server firstRound := true for !done { // Pop the last 32 or depth have commits from the pending list and // insert their parents into the pending list. // TODO: Properly build and implement haves negotiation, and move it // from remote.go to this package. var uphav packp.UploadHaves for i := 0; i < 32 && len(req.Haves) > 0; i++ { uphav.Haves = append(uphav.Haves, req.Haves[len(req.Haves)-1]) req.Haves = req.Haves[:len(req.Haves)-1] inVein++ } // Let the server know we're done const maxInVein = 256 done = len(req.Haves) == 0 || (gotContinue && inVein >= maxInVein) uphav.Done = done // Note: empty request means haves are a subset of wants, in that case we have // everything we asked for. Close the connection and return nil. if isSubset(req.Wants, uphav.Haves) && len(upreq.Shallows) == 0 { if err := pktline.WriteFlush(writer); err != nil { return nil, err } // Close the writer to signal the end of the request if err := writer.Close(); err != nil { return nil, fmt.Errorf("closing writer: %s", err) } return nil, ErrNoChange } // Begin the upload-pack negotiation if firstRound || conn.StatelessRPC() { if err := upreq.Encode(writer); err != nil { return nil, fmt.Errorf("sending upload-request: %w", err) } } readc := make(chan error) if !conn.StatelessRPC() { go func() { readc <- readShallows(conn, reader, req, &shallowInfo, firstRound) }() } // Encode upload-haves if err := uphav.Encode(writer); err != nil { return nil, fmt.Errorf("sending upload-haves: %w", err) } // Close the writer to signal the end of the request if conn.StatelessRPC() { if err := writer.Close(); err != nil { return nil, fmt.Errorf("closing writer: %w", err) } if err := readShallows(conn, reader, req, &shallowInfo, firstRound); err != nil { return nil, err } } else { // Wait for the read channel to be closed if err := <-readc; err != nil { return nil, err } } go func() { defer close(readc) if done || len(uphav.Haves) > 0 { var srvrs packp.ServerResponse if err := srvrs.Decode(reader); err != nil { readc <- fmt.Errorf("decoding server-response: %w", err) return } for _, ack := range srvrs.ACKs { if !gotContinue && ack.Status > 0 { gotContinue = true } if ack.Status == packp.ACKCommon { common[ack.Hash] = struct{}{} } } } readc <- nil }() // Wait for the read channel to be closed if err := <-readc; err != nil { return nil, err } firstRound = false } if !conn.StatelessRPC() { if err := writer.Close(); err != nil { return nil, fmt.Errorf("closing writer: %w", err) } } return shallowInfo, nil } func isSubset(needle []plumbing.Hash, haystack []plumbing.Hash) bool { for _, h := range needle { found := false for _, oh := range haystack { if h == oh { found = true break } } if !found { return false } } return true } func readShallows( conn Connection, r io.Reader, req *FetchRequest, shallowInfo **packp.ShallowUpdate, firstRound bool, ) error { // Decode shallow-update // If depth is not zero, then we expect a shallow update from the // server. if (firstRound || conn.StatelessRPC()) && req.Depth > 0 { var shupd packp.ShallowUpdate if err := shupd.Decode(r); err != nil { return fmt.Errorf("decoding shallow-update: %w", err) } // Only return the first shallow update if shallowInfo == nil { shallowInfo = new(*packp.ShallowUpdate) *shallowInfo = &shupd } } return nil }
package transport import ( "bufio" "bytes" "context" "io" "strings" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/protocol" "github.com/go-git/go-git/v6/plumbing/protocol/packp" "github.com/go-git/go-git/v6/plumbing/protocol/packp/capability" "github.com/go-git/go-git/v6/storage" "github.com/go-git/go-git/v6/utils/ioutil" ) // NewPackSession creates a new session that implements a full-duplex Git pack protocol. func NewPackSession( st storage.Storer, ep *Endpoint, auth AuthMethod, cmdr Commander, ) (Session, error) { ps := &PackSession{ ep: ep, auth: auth, cmdr: cmdr, st: st, } return ps, nil } // PackSession is a session that implements a full-duplex Git pack transport. type PackSession struct { cmdr Commander ep *Endpoint auth AuthMethod st storage.Storer } var _ Session = &PackSession{} // Handshake implements Session. func (p *PackSession) Handshake(ctx context.Context, service Service, params ...string) (conn Connection, err error) { switch service { case UploadPackService, ReceivePackService: // do nothing default: return nil, ErrUnsupportedService } cmd, err := p.cmdr.Command(ctx, service.String(), p.ep, p.auth, params...) if err != nil { return nil, err } c := &packConnection{ st: p.st, cmd: cmd, svc: service, } // Check if the context is already done before starting the command. if ctx.Err() != nil { return nil, ctx.Err() } stdin, err := cmd.StdinPipe() if err != nil { return nil, err } c.w = stdin stdout, err := cmd.StdoutPipe() if err != nil { return nil, err } cr := ioutil.NewContextReaderWithCloser(ctx, stdout, cmd) c.r = bufio.NewReader(cr) stderr, err := cmd.StderrPipe() if err != nil { return nil, err } // Some transports like Git doesn't support stderr, so we need to check if // it's not nil before starting to read it. if stderr != nil { go io.Copy(&c.stderrBuf, stderr) // nolint: errcheck } // Check if stderr is not empty before returning. defer func() { checkError(c.stderr(), &err) }() if err := cmd.Start(); err != nil { return nil, err } c.version, err = DiscoverVersion(c.r) if err != nil { return nil, err } switch c.version { case protocol.V2: return nil, ErrUnsupportedVersion case protocol.V1: // Read the version line fallthrough case protocol.V0: } ar := packp.NewAdvRefs() if err := ar.Decode(c.r); err != nil { return nil, err } c.refs = ar c.caps = ar.Capabilities return c, nil } // packConnection is a convenience type that implements io.ReadWriteCloser. type packConnection struct { st storage.Storer cmd Command svc Service w io.WriteCloser // stdin r *bufio.Reader // stdout stderrBuf bytes.Buffer version protocol.Version caps *capability.List refs *packp.AdvRefs } var _ Connection = &packConnection{} // stderr returns stderr of the command if it's not empty. This will always // return a RemoteError. func (p *packConnection) stderr() error { s := strings.TrimSpace(p.stderrBuf.String()) if s == "" { return nil } return NewRemoteError(s) } // Close implements Connection. func (p *packConnection) Close() error { return p.cmd.Close() } // Capabilities implements Connection. func (p *packConnection) Capabilities() *capability.List { return p.caps } // GetRemoteRefs implements Connection. func (p *packConnection) GetRemoteRefs(ctx context.Context) ([]*plumbing.Reference, error) { if p.refs == nil { // TODO: return appropriate error return nil, ErrEmptyRemoteRepository } // Some servers like jGit, announce capabilities instead of returning an // packp message with a flush. This verifies that we received a empty // adv-refs, even if it contains capabilities. forPush := p.svc == ReceivePackService if !forPush && p.refs.IsEmpty() { return nil, ErrEmptyRemoteRepository } return p.refs.MakeReferenceSlice() } // Version implements Connection. func (p *packConnection) Version() protocol.Version { return p.version } // StatelessRPC implements Connection. func (*packConnection) StatelessRPC() bool { return false } // Fetch implements Connection. func (p *packConnection) Fetch(ctx context.Context, req *FetchRequest) (err error) { shallows, err := NegotiatePack(ctx, p.st, p, p.r, p.w, req) if err != nil { return err } return FetchPack(ctx, p.st, p, io.NopCloser(p.r), shallows, req) } // Push implements Connection. func (p *packConnection) Push(ctx context.Context, req *PushRequest) (err error) { return SendPack(ctx, p.st, p, p.w, io.NopCloser(p.r), req) } // checkError checks if the error is not nil updates the pointer with the // error. func checkError(err error, perr *error) { if err != nil { *perr = err } }
package transport import ( "context" "fmt" "io" "github.com/go-git/go-git/v6/plumbing/protocol/packp" "github.com/go-git/go-git/v6/plumbing/protocol/packp/capability" "github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband" "github.com/go-git/go-git/v6/storage" "github.com/go-git/go-git/v6/utils/ioutil" ) // buildUpdateRequests constructs a new update-requests object for the given // connection and push request. func buildUpdateRequests(caps *capability.List, req *PushRequest) *packp.UpdateRequests { upreq := packp.NewUpdateRequests() // The reference discovery phase is done nearly the same way as it is in // the fetching protocol. Each reference obj-id and name on the server is // sent in packet-line format to the client, followed by a flush-pkt. The // only real difference is that the capability listing is different - the // only possible values are report-status, report-status-v2, delete-refs, // ofs-delta, atomic and push-options. for _, cap := range []capability.Capability{ capability.ReportStatus, // TODO: support report-status-v2 // capability.ReportStatusV2, capability.DeleteRefs, capability.OFSDelta, // This is set later if options are present. // capability.PushOptions, } { if caps.Supports(cap) { upreq.Capabilities.Set(cap) //nolint:errcheck } } if req.Atomic && caps.Supports(capability.Atomic) { upreq.Capabilities.Set(capability.Atomic) //nolint:errcheck } upreq.Commands = req.Commands return upreq } // SendPack is a function that sends a packfile to a remote server. func SendPack( ctx context.Context, st storage.Storer, conn Connection, writer io.WriteCloser, reader io.ReadCloser, req *PushRequest, ) error { writer = ioutil.NewContextWriteCloser(ctx, writer) reader = ioutil.NewContextReadCloser(ctx, reader) var needPackfile bool for _, cmd := range req.Commands { if cmd.Action() != packp.Delete { needPackfile = true break } } if !needPackfile && req.Packfile != nil { return fmt.Errorf("packfile is not accepted for push request without new objects") } if needPackfile && req.Packfile == nil { return fmt.Errorf("packfile is required for push request with new objects") } caps := conn.Capabilities() upreq := buildUpdateRequests(caps, req) usePushOptions := len(req.Options) > 0 && caps.Supports(capability.PushOptions) if usePushOptions { upreq.Capabilities.Set(capability.PushOptions) //nolint:errcheck } if err := upreq.Encode(writer); err != nil { return err } if usePushOptions { var opts packp.PushOptions opts.Options = req.Options if err := opts.Encode(writer); err != nil { return fmt.Errorf("encoding push-options: %w", err) } } // Send the packfile. if req.Packfile != nil { if _, err := io.Copy(writer, req.Packfile); err != nil { return err } if err := req.Packfile.Close(); err != nil { return fmt.Errorf("closing packfile: %w", err) } } // Close the write pipe to signal the end of the request. if err := writer.Close(); err != nil { return err } if !caps.Supports(capability.ReportStatus) { // If we don't have report-status, we're done here. return nil } var r io.Reader = reader if req.Progress != nil { var d *sideband.Demuxer if caps.Supports(capability.Sideband64k) { d = sideband.NewDemuxer(sideband.Sideband64k, reader) } else if caps.Supports(capability.Sideband) { d = sideband.NewDemuxer(sideband.Sideband, reader) } if d != nil { d.Progress = req.Progress r = d } } report := packp.NewReportStatus() if err := report.Decode(r); err != nil { return fmt.Errorf("decode report-status: %w", err) } if err := reader.Close(); err != nil { return fmt.Errorf("closing reader: %w", err) } return report.Error() }
package transport import ( "bufio" "context" "fmt" "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/format/packfile" "github.com/go-git/go-git/v6/plumbing/format/pktline" "github.com/go-git/go-git/v6/plumbing/protocol" "github.com/go-git/go-git/v6/plumbing/protocol/packp" "github.com/go-git/go-git/v6/plumbing/protocol/packp/capability" "github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/storage" "github.com/go-git/go-git/v6/utils/ioutil" ) // ReceivePackOptions is a set of options for the ReceivePack service. type ReceivePackOptions struct { GitProtocol string AdvertiseRefs bool StatelessRPC bool } // ReceivePack is a server command that serves the receive-pack service. // TODO: support hooks func ReceivePack( ctx context.Context, st storage.Storer, r io.ReadCloser, w io.WriteCloser, opts *ReceivePackOptions, ) error { if w == nil { return fmt.Errorf("nil writer") } w = ioutil.NewContextWriteCloser(ctx, w) if opts == nil { opts = &ReceivePackOptions{} } if opts.AdvertiseRefs || !opts.StatelessRPC { switch version := ProtocolVersion(opts.GitProtocol); version { case protocol.V1: if _, err := pktline.Writef(w, "version %d\n", version); err != nil { return err } // TODO: support version 2 case protocol.V0, protocol.V2: default: return fmt.Errorf("%w: %q", ErrUnsupportedVersion, version) } if err := AdvertiseReferences(ctx, st, w, ReceivePackService, opts.StatelessRPC); err != nil { return err } } if opts.AdvertiseRefs { // Done, there's nothing else to do return nil } if r == nil { return fmt.Errorf("nil reader") } r = ioutil.NewContextReadCloser(ctx, r) rd := bufio.NewReader(r) l, _, err := pktline.PeekLine(rd) if err != nil { return err } // At this point, if we get a flush packet, it means the client // has nothing to send, so we can return early. if l == pktline.Flush { return nil } updreq := packp.NewUpdateRequests() if err := updreq.Decode(rd); err != nil { return err } var ( caps = updreq.Capabilities needPackfile bool pushOpts packp.PushOptions ) // TODO: Pass the options to the server-side hooks. if updreq.Capabilities.Supports(capability.PushOptions) { if err := pushOpts.Decode(rd); err != nil { return fmt.Errorf("decoding push-options: %w", err) } } // Should we expect a packfile? for _, cmd := range updreq.Commands { if cmd.Action() != packp.Delete { needPackfile = true break } } // Receive the packfile var unpackErr error if needPackfile { unpackErr = packfile.UpdateObjectStorage(st, rd) } // Done with the request, now close the reader // to indicate that we are done reading from it. if err := r.Close(); err != nil { return fmt.Errorf("closing reader: %w", err) } // Report status if the client supports it if !updreq.Capabilities.Supports(capability.ReportStatus) { return unpackErr } var ( useSideband bool writer io.Writer = w ) if !caps.Supports(capability.NoProgress) { if caps.Supports(capability.Sideband64k) { writer = sideband.NewMuxer(sideband.Sideband64k, w) useSideband = true } else if caps.Supports(capability.Sideband) { writer = sideband.NewMuxer(sideband.Sideband, w) useSideband = true } } writeCloser := ioutil.NewWriteCloser(writer, w) if unpackErr != nil { res := sendReportStatus(writeCloser, unpackErr, nil) closeWriter(w) return res } var firstErr error cmdStatus := make(map[plumbing.ReferenceName]error) updateReferences(st, updreq, cmdStatus, &firstErr) if err := sendReportStatus(writeCloser, firstErr, cmdStatus); err != nil { return err } if useSideband { if err := pktline.WriteFlush(w); err != nil { return fmt.Errorf("flushing sideband: %w", err) } } if firstErr != nil { return firstErr } return closeWriter(w) } func closeWriter(w io.WriteCloser) error { if err := w.Close(); err != nil { return fmt.Errorf("closing writer: %w", err) } return nil } func sendReportStatus(w io.WriteCloser, unpackErr error, cmdStatus map[plumbing.ReferenceName]error) error { rs := packp.NewReportStatus() rs.UnpackStatus = "ok" if unpackErr != nil { rs.UnpackStatus = unpackErr.Error() } for ref, err := range cmdStatus { msg := "ok" if err != nil { msg = err.Error() } status := &packp.CommandStatus{ ReferenceName: ref, Status: msg, } rs.CommandStatuses = append(rs.CommandStatuses, status) } if err := rs.Encode(w); err != nil { return err } return nil } func setStatus(cmdStatus map[plumbing.ReferenceName]error, firstErr *error, ref plumbing.ReferenceName, err error) { cmdStatus[ref] = err if *firstErr == nil && err != nil { *firstErr = err } } func referenceExists(s storer.ReferenceStorer, n plumbing.ReferenceName) (bool, error) { _, err := s.Reference(n) if err == plumbing.ErrReferenceNotFound { return false, nil } return err == nil, err } func updateReferences(st storage.Storer, req *packp.UpdateRequests, cmdStatus map[plumbing.ReferenceName]error, firstErr *error) { for _, cmd := range req.Commands { exists, err := referenceExists(st, cmd.Name) if err != nil { setStatus(cmdStatus, firstErr, cmd.Name, err) continue } switch cmd.Action() { case packp.Create: if exists { setStatus(cmdStatus, firstErr, cmd.Name, ErrUpdateReference) continue } ref := plumbing.NewHashReference(cmd.Name, cmd.New) err := st.SetReference(ref) setStatus(cmdStatus, firstErr, cmd.Name, err) case packp.Delete: if !exists { setStatus(cmdStatus, firstErr, cmd.Name, ErrUpdateReference) continue } err := st.RemoveReference(cmd.Name) setStatus(cmdStatus, firstErr, cmd.Name, err) case packp.Update: if !exists { setStatus(cmdStatus, firstErr, cmd.Name, ErrUpdateReference) continue } ref := plumbing.NewHashReference(cmd.Name, cmd.New) err := st.SetReference(ref) setStatus(cmdStatus, firstErr, cmd.Name, err) } } }
package transport import ( "fmt" "sync" ) // registry are the protocols supported by default. var ( registry = map[string]Transport{} mtx sync.RWMutex ) // Register adds or modifies an existing protocol. // Equivalent to client.InstallProtocol in go-git before V6. func Register(protocol string, c Transport) { mtx.Lock() registry[protocol] = c mtx.Unlock() } // Unregister removes a protocol from the list of supported protocols. func Unregister(scheme string) { mtx.Lock() delete(registry, scheme) mtx.Unlock() } // Get returns the appropriate client for the given protocol. func Get(p string) (Transport, error) { mtx.RLock() defer mtx.RUnlock() f, ok := registry[p] if !ok { return nil, fmt.Errorf("unsupported scheme %q", p) } if f == nil { return nil, fmt.Errorf("malformed client for scheme %q, client is defined as nil", p) } return f, nil }
package transport import ( "context" "errors" "fmt" "io" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/object" "github.com/go-git/go-git/v6/plumbing/protocol/packp" "github.com/go-git/go-git/v6/plumbing/protocol/packp/capability" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/storage" ) var ErrUpdateReference = errors.New("failed to update ref") // AdvertiseReferences is a server command that implements the reference // discovery phase of the Git transfer protocol. func AdvertiseReferences( ctx context.Context, st storage.Storer, w io.Writer, service Service, smart bool, ) error { switch service { case UploadPackService, ReceivePackService: default: return fmt.Errorf("%w: %s", ErrUnsupportedService, service) } forPush := service == ReceivePackService ar := packp.NewAdvRefs() // Set server default capabilities ar.Capabilities.Set(capability.Agent, capability.DefaultAgent()) //nolint:errcheck ar.Capabilities.Set(capability.OFSDelta) //nolint:errcheck ar.Capabilities.Set(capability.Sideband64k) //nolint:errcheck if forPush { // TODO: support thin-pack ar.Capabilities.Set(capability.NoThin) //nolint:errcheck // TODO: support atomic ar.Capabilities.Set(capability.DeleteRefs) //nolint:errcheck ar.Capabilities.Set(capability.ReportStatus) //nolint:errcheck ar.Capabilities.Set(capability.PushOptions) //nolint:errcheck } else { // TODO: support include-tag // TODO: support deepen // TODO: support deepen-since ar.Capabilities.Set(capability.MultiACK) //nolint:errcheck ar.Capabilities.Set(capability.MultiACKDetailed) //nolint:errcheck ar.Capabilities.Set(capability.Sideband) //nolint:errcheck ar.Capabilities.Set(capability.NoProgress) //nolint:errcheck ar.Capabilities.Set(capability.SymRef) //nolint:errcheck ar.Capabilities.Set(capability.Shallow) //nolint:errcheck } // Set references if err := addReferences(st, ar, !forPush); err != nil { return err } if smart { smartReply := packp.SmartReply{ Service: service.String(), } if err := smartReply.Encode(w); err != nil { return fmt.Errorf("failed to encode smart reply: %w", err) } } return ar.Encode(w) } func addReferences(st storage.Storer, ar *packp.AdvRefs, addHead bool) error { iter, err := st.IterReferences() if err != nil { return err } // Add references and their peeled values if err := iter.ForEach(func(r *plumbing.Reference) error { hash, name := r.Hash(), r.Name() switch r.Type() { case plumbing.SymbolicReference: ref, err := storer.ResolveReference(st, r.Target()) if errors.Is(err, plumbing.ErrReferenceNotFound) { return nil } if err != nil { return err } hash = ref.Hash() } if name == plumbing.HEAD { if !addHead { return nil } // Add default branch HEAD symref ar.Capabilities.Add(capability.SymRef, fmt.Sprintf("%s:%s", name, r.Target())) //nolint:errcheck ar.Head = &hash } ar.References[name.String()] = hash if r.Name().IsTag() { if tag, err := object.GetTag(st, hash); err == nil { ar.Peeled[name.String()] = tag.Target } } return nil }); err != nil { return err } return nil }
package transport import ( "fmt" "github.com/go-git/go-billy/v5" "github.com/go-git/go-git/v6/internal/repository" "github.com/go-git/go-git/v6/plumbing/storer" "github.com/go-git/go-git/v6/storage" ) // UpdateServerInfo updates the server info files in the repository. // // It generates a list of available refs for the repository. // Used by git http transport (dumb), for more information refer to: // https://git-scm.com/book/id/v2/Git-Internals-Transfer-Protocols#_the_dumb_protocol func UpdateServerInfo(s storage.Storer, fs billy.Filesystem) error { pos, ok := s.(storer.PackedObjectStorer) if !ok { return ErrPackedObjectsNotSupported } infoRefs, err := fs.Create("info/refs") if err != nil { return err } defer infoRefs.Close() //nolint:errcheck refsIter, err := s.IterReferences() if err != nil { return err } defer refsIter.Close() if err := repository.WriteInfoRefs(infoRefs, s); err != nil { return fmt.Errorf("failed to write info/refs: %w", err) } infoPacks, err := fs.Create("objects/info/packs") if err != nil { return err } defer infoPacks.Close() //nolint:errcheck if err := repository.WriteObjectsInfoPacks(infoPacks, pos); err != nil { return fmt.Errorf("failed to write objects/info/packs: %w", err) } return nil }
package transport import "strings" // Service represents a Git transport service. // All services are prefixed with "git-". type Service string // String returns the string representation of the service. func (s Service) String() string { return string(s) } // Name returns the name of the service without the "git-" prefix. func (s Service) Name() string { return strings.TrimPrefix(string(s), "git-") } // Git service names. const ( UploadPackService Service = "git-upload-pack" UploadArchiveService Service = "git-upload-archive" ReceivePackService Service = "git-receive-pack" )
// Package transport includes the implementation for different transport // protocols. // // `Client` can be used to fetch and send packfiles to a git server. // The `client` package provides higher level functions to instantiate the // appropriate `Client` based on the repository URL. // // go-git supports HTTP and SSH (see `Protocols`), but you can also install // your own protocols (see the `client` package). // // Each protocol has its own implementation of `Client`, but you should // generally not use them directly, use `client.NewClient` instead. package transport import ( "bytes" "errors" "fmt" "net/url" "regexp" "runtime" "strconv" "strings" giturl "github.com/go-git/go-git/v6/internal/url" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/protocol" "github.com/go-git/go-git/v6/storage" ) var ( ErrRepositoryNotFound = errors.New("repository not found") ErrEmptyRemoteRepository = errors.New("remote repository is empty") ErrNoChange = errors.New("no change") ErrAuthenticationRequired = errors.New("authentication required") ErrAuthorizationFailed = errors.New("authorization failed") ErrEmptyUploadPackRequest = errors.New("empty git-upload-pack given") ErrInvalidAuthMethod = errors.New("invalid auth method") ErrAlreadyConnected = errors.New("session already established") ) // Transport can initiate git-upload-pack and git-receive-pack processes. // It is implemented both by the client and the server, making this a RPC. type Transport interface { // NewSession returns a new session for an endpoint. NewSession(storage.Storer, *Endpoint, AuthMethod) (Session, error) // SupportedProtocols returns a list of supported Git protocol versions by // the transport client. SupportedProtocols() []protocol.Version } type AuthMethod interface { fmt.Stringer Name() string } // Endpoint represents a Git URL in any supported protocol. type Endpoint struct { // Protocol is the protocol of the endpoint (e.g. git, https, file). Protocol string // User is the user. User string // Password is the password. Password string // Host is the host. Host string // Port is the port to connect, if 0 the default port for the given protocol // will be used. Port int // Path is the repository path. Path string // InsecureSkipTLS skips ssl verify if protocol is https InsecureSkipTLS bool // CaBundle specify additional ca bundle with system cert pool CaBundle []byte // Proxy provides info required for connecting to a proxy. Proxy ProxyOptions } type ProxyOptions struct { URL string Username string Password string } func (o *ProxyOptions) Validate() error { if o.URL != "" { _, err := url.Parse(o.URL) return err } return nil } func (o *ProxyOptions) FullURL() (*url.URL, error) { proxyURL, err := url.Parse(o.URL) if err != nil { return nil, err } if o.Username != "" { if o.Password != "" { proxyURL.User = url.UserPassword(o.Username, o.Password) } else { proxyURL.User = url.User(o.Username) } } return proxyURL, nil } var defaultPorts = map[string]int{ "http": 80, "https": 443, "git": 9418, "ssh": 22, } var fileIssueWindows = regexp.MustCompile(`^/[A-Za-z]:(/|\\)`) // String returns a string representation of the Git URL. func (u *Endpoint) String() string { var buf bytes.Buffer if u.Protocol != "" { buf.WriteString(u.Protocol) buf.WriteByte(':') } if u.Protocol != "" || u.Host != "" || u.User != "" || u.Password != "" { buf.WriteString("//") if u.User != "" || u.Password != "" { buf.WriteString(url.PathEscape(u.User)) if u.Password != "" { buf.WriteByte(':') buf.WriteString(url.PathEscape(u.Password)) } buf.WriteByte('@') } if u.Host != "" { buf.WriteString(u.Host) if u.Port != 0 { port, ok := defaultPorts[strings.ToLower(u.Protocol)] if !ok || ok && port != u.Port { fmt.Fprintf(&buf, ":%d", u.Port) } } } } if u.Path != "" && u.Path[0] != '/' && u.Host != "" { buf.WriteByte('/') } buf.WriteString(u.Path) return buf.String() } func NewEndpoint(endpoint string) (*Endpoint, error) { if e, ok := parseSCPLike(endpoint); ok { return e, nil } if e, ok := parseFile(endpoint); ok { return e, nil } return parseURL(endpoint) } func parseURL(endpoint string) (*Endpoint, error) { if strings.HasPrefix(endpoint, "file://") { endpoint = strings.TrimPrefix(endpoint, "file://") // When triple / is used, the path in Windows may end up having an // additional / resulting in "/C:/Dir". if runtime.GOOS == "windows" && fileIssueWindows.MatchString(endpoint) { endpoint = endpoint[1:] } return &Endpoint{ Protocol: "file", Path: endpoint, }, nil } u, err := url.Parse(endpoint) if err != nil { return nil, err } if !u.IsAbs() { return nil, plumbing.NewPermanentError(fmt.Errorf( "invalid endpoint: %s", endpoint, )) } var user, pass string if u.User != nil { user = u.User.Username() pass, _ = u.User.Password() } host := u.Hostname() if strings.Contains(host, ":") { // IPv6 address host = "[" + host + "]" } return &Endpoint{ Protocol: u.Scheme, User: user, Password: pass, Host: host, Port: getPort(u), Path: getPath(u), }, nil } func getPort(u *url.URL) int { p := u.Port() if p == "" { return 0 } i, err := strconv.Atoi(p) if err != nil { return 0 } return i } func getPath(u *url.URL) string { var res string = u.Path if u.RawQuery != "" { res += "?" + u.RawQuery } if u.Fragment != "" { res += "#" + u.Fragment } return res } func parseSCPLike(endpoint string) (*Endpoint, bool) { if giturl.MatchesScheme(endpoint) || !giturl.MatchesScpLike(endpoint) { return nil, false } user, host, portStr, path := giturl.FindScpLikeComponents(endpoint) port, err := strconv.Atoi(portStr) if err != nil { port = 22 } return &Endpoint{ Protocol: "ssh", User: user, Host: host, Port: port, Path: path, }, true } func parseFile(endpoint string) (*Endpoint, bool) { if giturl.MatchesScheme(endpoint) { return nil, false } path := endpoint return &Endpoint{ Protocol: "file", Path: path, }, true }
package transport import ( "bufio" "context" "fmt" "io" "math" "github.com/go-git/go-git/v6/plumbing" "github.com/go-git/go-git/v6/plumbing/format/packfile" "github.com/go-git/go-git/v6/plumbing/format/pktline" "github.com/go-git/go-git/v6/plumbing/object" "github.com/go-git/go-git/v6/plumbing/protocol" "github.com/go-git/go-git/v6/plumbing/protocol/packp" "github.com/go-git/go-git/v6/plumbing/protocol/packp/capability" "github.com/go-git/go-git/v6/plumbing/protocol/packp/sideband" "github.com/go-git/go-git/v6/plumbing/revlist" "github.com/go-git/go-git/v6/storage" "github.com/go-git/go-git/v6/utils/ioutil" ) // UploadPackOptions is a set of options for the UploadPack service. type UploadPackOptions struct { GitProtocol string AdvertiseRefs bool StatelessRPC bool } // UploadPack is a server command that serves the upload-pack service. func UploadPack( ctx context.Context, st storage.Storer, r io.ReadCloser, w io.WriteCloser, opts *UploadPackOptions, ) error { if w == nil { return fmt.Errorf("nil writer") } w = ioutil.NewContextWriteCloser(ctx, w) if opts == nil { opts = &UploadPackOptions{} } if opts.AdvertiseRefs || !opts.StatelessRPC { switch version := ProtocolVersion(opts.GitProtocol); version { case protocol.V1: if _, err := pktline.Writef(w, "version %d\n", version); err != nil { return err } // TODO: support version 2 case protocol.V0, protocol.V2: default: return fmt.Errorf("%w: %q", ErrUnsupportedVersion, version) } if err := AdvertiseReferences(ctx, st, w, UploadPackService, opts.StatelessRPC); err != nil { return fmt.Errorf("advertising references: %w", err) } } if opts.AdvertiseRefs { // Done, there's nothing else to do return nil } if r == nil { return fmt.Errorf("nil reader") } r = ioutil.NewContextReadCloser(ctx, r) rd := bufio.NewReader(r) l, _, err := pktline.PeekLine(rd) if err != nil { return fmt.Errorf("peeking line: %w", err) } // In case the client has nothing to send, it sends a flush packet to // indicate that it is done sending data. In that case, we're done // here. if l == pktline.Flush { return nil } var done bool var haves []plumbing.Hash var upreq *packp.UploadRequest var havesWithRef map[plumbing.Hash][]plumbing.Hash var multiAck, multiAckDetailed bool var caps *capability.List var wants []plumbing.Hash firstRound := true for !done { writec := make(chan error) if firstRound || opts.StatelessRPC { upreq = packp.NewUploadRequest() if err := upreq.Decode(rd); err != nil { return fmt.Errorf("decoding upload-request: %w", err) } wants = upreq.Wants caps = upreq.Capabilities if err := r.Close(); err != nil { return fmt.Errorf("closing reader: %w", err) } // Find common commits/objects havesWithRef, err = revlist.ObjectsWithRef(st, wants, nil) if err != nil { return fmt.Errorf("getting objects with ref: %w", err) } // Encode objects to packfile and write to client multiAck = caps.Supports(capability.MultiACK) multiAckDetailed = caps.Supports(capability.MultiACKDetailed) go func() { // TODO: support deepen-since, and deepen-not var shupd packp.ShallowUpdate if !upreq.Depth.IsZero() { switch depth := upreq.Depth.(type) { case packp.DepthCommits: if err := getShallowCommits(st, wants, int(depth), &shupd); err != nil { writec <- fmt.Errorf("getting shallow commits: %w", err) return } default: writec <- fmt.Errorf("unsupported depth type %T", upreq.Depth) return } if err := shupd.Encode(w); err != nil { writec <- fmt.Errorf("sending shallow-update: %w", err) return } } writec <- nil }() } if err := <-writec; err != nil { return err } var uphav packp.UploadHaves if err := uphav.Decode(rd); err != nil { return fmt.Errorf("decoding upload-haves: %w", err) } if err := r.Close(); err != nil { return fmt.Errorf("closing reader: %w", err) } haves = append(haves, uphav.Haves...) done = uphav.Done common := map[plumbing.Hash]struct{}{} var ack packp.ACK var acks []packp.ACK for _, hu := range uphav.Haves { refs, ok := havesWithRef[hu] if ok { for _, ref := range refs { common[ref] = struct{}{} } } var status packp.ACKStatus if multiAckDetailed { status = packp.ACKCommon if !ok { status = packp.ACKReady } } else if multiAck { status = packp.ACKContinue } if ok || multiAck || multiAckDetailed { ack = packp.ACK{Hash: hu, Status: status} acks = append(acks, ack) if !multiAck && !multiAckDetailed { break } } } go func() { defer close(writec) if len(haves) > 0 { // Encode ACKs to client when we have haves srvrsp := packp.ServerResponse{ACKs: acks} if err := srvrsp.Encode(w); err != nil { writec <- fmt.Errorf("sending acks server-response: %w", err) return } } if !done { if multiAck || multiAckDetailed { // Encode a NAK for multi-ack srvrsp := packp.ServerResponse{} if err := srvrsp.Encode(w); err != nil { writec <- fmt.Errorf("sending nak server-response: %w", err) return } } } else if !ack.Hash.IsZero() && (multiAck || multiAckDetailed) { // We're done, send the final ACK ack.Status = 0 srvrsp := packp.ServerResponse{ACKs: []packp.ACK{ack}} if err := srvrsp.Encode(w); err != nil { writec <- fmt.Errorf("sending final ack server-response: %w", err) return } } else if ack.Hash.IsZero() { // We don't have multi-ack and there are no haves. Encode a NAK. srvrsp := packp.ServerResponse{} if err := srvrsp.Encode(w); err != nil { writec <- fmt.Errorf("sending final nak server-response: %w", err) return } } writec <- nil }() if err := <-writec; err != nil { return err } firstRound = false } // Done with the request, now close the reader // to indicate that we are done reading from it. if err := r.Close(); err != nil { return fmt.Errorf("closing reader: %w", err) } objs, err := objectsToUpload(st, wants, haves) if err != nil { w.Close() //nolint:errcheck return fmt.Errorf("getting objects to upload: %w", err) } var ( useSideband bool writer io.Writer = w ) if !caps.Supports(capability.NoProgress) { if caps.Supports(capability.Sideband64k) { writer = sideband.NewMuxer(sideband.Sideband64k, w) useSideband = true } else if caps.Supports(capability.Sideband) { writer = sideband.NewMuxer(sideband.Sideband, w) useSideband = true } } // TODO: Support shallow-file // TODO: Support thin-pack e := packfile.NewEncoder(writer, st, false) _, err = e.Encode(objs, 10) if err != nil { return fmt.Errorf("encoding packfile: %w", err) } if useSideband { if err := pktline.WriteFlush(w); err != nil { return fmt.Errorf("flushing sideband: %w", err) } } if err := w.Close(); err != nil { return fmt.Errorf("closing writer: %w", err) } return nil } func objectsToUpload(st storage.Storer, wants, haves []plumbing.Hash) ([]plumbing.Hash, error) { return revlist.Objects(st, wants, haves) } func getShallowCommits(st storage.Storer, heads []plumbing.Hash, depth int, upd *packp.ShallowUpdate) error { var i, curDepth int var commit *object.Commit depths := map[*object.Commit]int{} stack := []object.Object{} for commit != nil || i < len(heads) || len(stack) > 0 { if commit == nil { if i < len(heads) { obj, err := st.EncodedObject(plumbing.CommitObject, heads[i]) i++ if err != nil { continue } commit, err = object.DecodeCommit(st, obj) if err != nil { commit = nil continue } depths[commit] = 0 curDepth = 0 } else if len(stack) > 0 { commit = stack[len(stack)-1].(*object.Commit) stack = stack[:len(stack)-1] curDepth = depths[commit] } } curDepth++ if depth != math.MaxInt && curDepth >= depth { upd.Shallows = append(upd.Shallows, commit.Hash) commit = nil continue } upd.Unshallows = append(upd.Unshallows, commit.Hash) parents := commit.Parents() commit = nil for { parent, err := parents.Next() if err == io.EOF { break } if err != nil { return err } if depths[parent] != 0 && curDepth >= depths[parent] { continue } depths[parent] = curDepth if _, err := parents.Next(); err == nil { stack = append(stack, parent) } else { commit = parent curDepth = depths[commit] } } } return nil }
package transport import ( "strings" "github.com/go-git/go-git/v6/plumbing/format/pktline" "github.com/go-git/go-git/v6/plumbing/protocol" "github.com/go-git/go-git/v6/utils/ioutil" ) // DiscoverVersion reads the first pktline from the reader to determine the // protocol version. This is used by the client to determine the protocol // version of the server. func DiscoverVersion(r ioutil.ReadPeeker) (protocol.Version, error) { ver := protocol.V0 _, pktb, err := pktline.PeekLine(r) if err != nil { return ver, err } pkt := strings.TrimSpace(string(pktb)) if strings.HasPrefix(pkt, "version ") { // Consume the version packet pktline.ReadLine(r) // nolint:errcheck if v, _ := protocol.Parse(pkt[8:]); v > ver { ver = protocol.Version(v) } } return ver, nil } // ProtocolVersion tries to find the version parameter in the protocol string. // This expects the protocol string from the GIT_PROTOCOL environment variable. // This is used by the server to determine the protocol version requested by // the client. func ProtocolVersion(p string) protocol.Version { var ver protocol.Version for _, param := range strings.Split(p, ":") { if strings.HasPrefix(param, "version=") { if v, _ := protocol.Parse(param[8:]); v > ver { ver = protocol.Version(v) } } } return ver }