/src/git/chunk-format.c

Source
#define USE_THE_REPOSITORY_VARIABLE
#define DISABLE_SIGN_COMPARE_WARNINGS

#include "git-compat-util.h"
#include "chunk-format.h"
#include "csum-file.h"
#include "gettext.h"
#include "hash.h"
#include "trace2.h"

/*
 * When writing a chunk-based file format, collect the chunks in
 * an array of chunk_info structs. The size stores the _expected_
 * amount of data that will be written by write_fn.
 */
struct chunk_info {
  uint32_t id;
  uint64_t size;
  chunk_write_fn write_fn;

  const void *start;
};

struct chunkfile {
  struct hashfile *f;

  struct chunk_info *chunks;
  size_t chunks_nr;
  size_t chunks_alloc;
};

struct chunkfile *init_chunkfile(struct hashfile *f)
{
  struct chunkfile *cf = xcalloc(1, sizeof(*cf));
  cf->f = f;
  return cf;
}

void free_chunkfile(struct chunkfile *cf)
{
  if (!cf)
    return;
  free(cf->chunks);
  free(cf);
}

int get_num_chunks(struct chunkfile *cf)
{
  return cf->chunks_nr;
}

void add_chunk(struct chunkfile *cf,
         uint32_t id,
         size_t size,
         chunk_write_fn fn)
{
  ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);

  cf->chunks[cf->chunks_nr].id = id;
  cf->chunks[cf->chunks_nr].write_fn = fn;
  cf->chunks[cf->chunks_nr].size = size;
  cf->chunks_nr++;
}

int write_chunkfile(struct chunkfile *cf, void *data)
{
  int i, result = 0;
  uint64_t cur_offset = hashfile_total(cf->f);

  trace2_region_enter("chunkfile", "write", the_repository);

  /* Add the table of contents to the current offset */
  cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;

  for (i = 0; i < cf->chunks_nr; i++) {
    hashwrite_be32(cf->f, cf->chunks[i].id);
    hashwrite_be64(cf->f, cur_offset);

    cur_offset += cf->chunks[i].size;
  }

  /* Trailing entry marks the end of the chunks */
  hashwrite_be32(cf->f, 0);
  hashwrite_be64(cf->f, cur_offset);

  for (i = 0; i < cf->chunks_nr; i++) {
    off_t start_offset = hashfile_total(cf->f);
    result = cf->chunks[i].write_fn(cf->f, data);

    if (result)
      goto cleanup;

    if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
      BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
          cf->chunks[i].size, cf->chunks[i].id,
          hashfile_total(cf->f) - start_offset);
  }

cleanup:
  trace2_region_leave("chunkfile", "write", the_repository);
  return result;
}

int read_table_of_contents(struct chunkfile *cf,
         const unsigned char *mfile,
         size_t mfile_size,
         uint64_t toc_offset,
         int toc_length,
         unsigned expected_alignment)
{
  int i;
  uint32_t chunk_id;
  const unsigned char *table_of_contents = mfile + toc_offset;

  ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);

  while (toc_length--) {
    uint64_t chunk_offset, next_chunk_offset;

    chunk_id = get_be32(table_of_contents);
    chunk_offset = get_be64(table_of_contents + 4);

    if (!chunk_id) {
      error(_("terminating chunk id appears earlier than expected"));
      return 1;
    }
    if (chunk_offset % expected_alignment != 0) {
      error(_("chunk id %"PRIx32" not %d-byte aligned"),
            chunk_id, expected_alignment);
      return 1;
    }

    table_of_contents += CHUNK_TOC_ENTRY_SIZE;
    next_chunk_offset = get_be64(table_of_contents + 4);

    if (next_chunk_offset < chunk_offset ||
        next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
      error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
            chunk_offset, next_chunk_offset);
      return -1;
    }

    for (i = 0; i < cf->chunks_nr; i++) {
      if (cf->chunks[i].id == chunk_id) {
        error(_("duplicate chunk ID %"PRIx32" found"),
          chunk_id);
        return -1;
      }
    }

    cf->chunks[cf->chunks_nr].id = chunk_id;
    cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
    cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
    cf->chunks_nr++;
  }

  chunk_id = get_be32(table_of_contents);
  if (chunk_id) {
    error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
    return -1;
  }

  return 0;
}

struct pair_chunk_data {
  const unsigned char **p;
  size_t *size;
};

static int pair_chunk_fn(const unsigned char *chunk_start,
       size_t chunk_size,
       void *data)
{
  struct pair_chunk_data *pcd = data;
  *pcd->p = chunk_start;
  *pcd->size = chunk_size;
  return 0;
}

int pair_chunk(struct chunkfile *cf,
         uint32_t chunk_id,
         const unsigned char **p,
         size_t *size)
{
  struct pair_chunk_data pcd = { .p = p, .size = size };
  return read_chunk(cf, chunk_id, pair_chunk_fn, &pcd);
}

int read_chunk(struct chunkfile *cf,
         uint32_t chunk_id,
         chunk_read_fn fn,
         void *data)
{
  int i;

  for (i = 0; i < cf->chunks_nr; i++) {
    if (cf->chunks[i].id == chunk_id)
      return fn(cf->chunks[i].start, cf->chunks[i].size, data);
  }

  return CHUNK_NOT_FOUND;
}

uint8_t oid_version(const struct git_hash_algo *algop)
{
  switch (hash_algo_by_ptr(algop)) {
  case GIT_HASH_SHA1:
    return 1;
  case GIT_HASH_SHA256:
    return 2;
  default:
    die(_("invalid hash version"));
  }
}

Coverage Report

Created: 2025-12-31 07:01

Line	Count	Source
1		#define USE_THE_REPOSITORY_VARIABLE
2		#define DISABLE_SIGN_COMPARE_WARNINGS
3
4		#include "git-compat-util.h"
5		#include "chunk-format.h"
6		#include "csum-file.h"
7		#include "gettext.h"
8		#include "hash.h"
9		#include "trace2.h"
10
11		/*
12		* When writing a chunk-based file format, collect the chunks in
13		* an array of chunk_info structs. The size stores the _expected_
14		* amount of data that will be written by write_fn.
15		*/
16		struct chunk_info {
17		uint32_t id;
18		uint64_t size;
19		chunk_write_fn write_fn;
20
21		const void *start;
22		};
23
24		struct chunkfile {
25		struct hashfile *f;
26
27		struct chunk_info *chunks;
28		size_t chunks_nr;
29		size_t chunks_alloc;
30		};
31
32		struct chunkfile init_chunkfile(struct hashfile f)
33	0	{
34	0	struct chunkfile cf = xcalloc(1, sizeof(cf));
35	0	cf->f = f;
36	0	return cf;
37	0	}
38
39		void free_chunkfile(struct chunkfile *cf)
40	0	{
41	0	if (!cf)
42	0	return;
43	0	free(cf->chunks);
44	0	free(cf);
45	0	}
46
47		int get_num_chunks(struct chunkfile *cf)
48	0	{
49	0	return cf->chunks_nr;
50	0	}
51
52		void add_chunk(struct chunkfile *cf,
53		uint32_t id,
54		size_t size,
55		chunk_write_fn fn)
56	0	{
57	0	ALLOC_GROW(cf->chunks, cf->chunks_nr + 1, cf->chunks_alloc);
58
59	0	cf->chunks[cf->chunks_nr].id = id;
60	0	cf->chunks[cf->chunks_nr].write_fn = fn;
61	0	cf->chunks[cf->chunks_nr].size = size;
62	0	cf->chunks_nr++;
63	0	}
64
65		int write_chunkfile(struct chunkfile cf, void data)
66	0	{
67	0	int i, result = 0;
68	0	uint64_t cur_offset = hashfile_total(cf->f);
69
70	0	trace2_region_enter("chunkfile", "write", the_repository);
71
72		/* Add the table of contents to the current offset */
73	0	cur_offset += (cf->chunks_nr + 1) * CHUNK_TOC_ENTRY_SIZE;
74
75	0	for (i = 0; i < cf->chunks_nr; i++) {
76	0	hashwrite_be32(cf->f, cf->chunks[i].id);
77	0	hashwrite_be64(cf->f, cur_offset);
78
79	0	cur_offset += cf->chunks[i].size;
80	0	}
81
82		/* Trailing entry marks the end of the chunks */
83	0	hashwrite_be32(cf->f, 0);
84	0	hashwrite_be64(cf->f, cur_offset);
85
86	0	for (i = 0; i < cf->chunks_nr; i++) {
87	0	off_t start_offset = hashfile_total(cf->f);
88	0	result = cf->chunks[i].write_fn(cf->f, data);
89
90	0	if (result)
91	0	goto cleanup;
92
93	0	if (hashfile_total(cf->f) - start_offset != cf->chunks[i].size)
94	0	BUG("expected to write %"PRId64" bytes to chunk %"PRIx32", but wrote %"PRId64" instead",
95	0	cf->chunks[i].size, cf->chunks[i].id,
96	0	hashfile_total(cf->f) - start_offset);
97	0	}
98
99	0	cleanup:
100	0	trace2_region_leave("chunkfile", "write", the_repository);
101	0	return result;
102	0	}
103
104		int read_table_of_contents(struct chunkfile *cf,
105		const unsigned char *mfile,
106		size_t mfile_size,
107		uint64_t toc_offset,
108		int toc_length,
109		unsigned expected_alignment)
110	0	{
111	0	int i;
112	0	uint32_t chunk_id;
113	0	const unsigned char *table_of_contents = mfile + toc_offset;
114
115	0	ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);
116
117	0	while (toc_length--) {
118	0	uint64_t chunk_offset, next_chunk_offset;
119
120	0	chunk_id = get_be32(table_of_contents);
121	0	chunk_offset = get_be64(table_of_contents + 4);
122
123	0	if (!chunk_id) {
124	0	error(_("terminating chunk id appears earlier than expected"));
125	0	return 1;
126	0	}
127	0	if (chunk_offset % expected_alignment != 0) {
128	0	error(_("chunk id %"PRIx32" not %d-byte aligned"),
129	0	chunk_id, expected_alignment);
130	0	return 1;
131	0	}
132
133	0	table_of_contents += CHUNK_TOC_ENTRY_SIZE;
134	0	next_chunk_offset = get_be64(table_of_contents + 4);
135
136	0	if (next_chunk_offset < chunk_offset \|\|
137	0	next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
138	0	error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
139	0	chunk_offset, next_chunk_offset);
140	0	return -1;
141	0	}
142
143	0	for (i = 0; i < cf->chunks_nr; i++) {
144	0	if (cf->chunks[i].id == chunk_id) {
145	0	error(_("duplicate chunk ID %"PRIx32" found"),
146	0	chunk_id);
147	0	return -1;
148	0	}
149	0	}
150
151	0	cf->chunks[cf->chunks_nr].id = chunk_id;
152	0	cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
153	0	cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
154	0	cf->chunks_nr++;
155	0	}
156
157	0	chunk_id = get_be32(table_of_contents);
158	0	if (chunk_id) {
159	0	error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
160	0	return -1;
161	0	}
162
163	0	return 0;
164	0	}
165
166		struct pair_chunk_data {
167		const unsigned char **p;
168		size_t *size;
169		};
170
171		static int pair_chunk_fn(const unsigned char *chunk_start,
172		size_t chunk_size,
173		void *data)
174	0	{
175	0	struct pair_chunk_data *pcd = data;
176	0	*pcd->p = chunk_start;
177	0	*pcd->size = chunk_size;
178	0	return 0;
179	0	}
180
181		int pair_chunk(struct chunkfile *cf,
182		uint32_t chunk_id,
183		const unsigned char **p,
184		size_t *size)
185	0	{
186	0	struct pair_chunk_data pcd = { .p = p, .size = size };
187	0	return read_chunk(cf, chunk_id, pair_chunk_fn, &pcd);
188	0	}
189
190		int read_chunk(struct chunkfile *cf,
191		uint32_t chunk_id,
192		chunk_read_fn fn,
193		void *data)
194	0	{
195	0	int i;
196
197	0	for (i = 0; i < cf->chunks_nr; i++) {
198	0	if (cf->chunks[i].id == chunk_id)
199	0	return fn(cf->chunks[i].start, cf->chunks[i].size, data);
200	0	}
201
202	0	return CHUNK_NOT_FOUND;
203	0	}
204
205		uint8_t oid_version(const struct git_hash_algo *algop)
206	0	{
207	0	switch (hash_algo_by_ptr(algop)) {
208	0	case GIT_HASH_SHA1:
209	0	return 1;
210	0	case GIT_HASH_SHA256:
211	0	return 2;
212	0	default:
213	0	die(_("invalid hash version"));
214	0	}
215	0	}