/src/git/csum-file.c

Source
/*
 * csum-file.c
 *
 * Copyright (C) 2005 Linus Torvalds
 *
 * Simple file write infrastructure for writing SHA1-summed
 * files. Useful when you write a file that you want to be
 * able to verify hasn't been messed with afterwards.
 */

#include "git-compat-util.h"
#include "csum-file.h"
#include "git-zlib.h"
#include "hash.h"
#include "progress.h"

static void verify_buffer_or_die(struct hashfile *f,
         const void *buf,
         unsigned int count)
{
  ssize_t ret = read_in_full(f->check_fd, f->check_buffer, count);

  if (ret < 0)
    die_errno("%s: sha1 file read error", f->name);
  if ((size_t)ret != count)
    die("%s: sha1 file truncated", f->name);
  if (memcmp(buf, f->check_buffer, count))
    die("sha1 file '%s' validation error", f->name);
}

static void flush(struct hashfile *f, const void *buf, unsigned int count)
{
  if (0 <= f->check_fd && count)
    verify_buffer_or_die(f, buf, count);

  if (write_in_full(f->fd, buf, count) < 0) {
    if (errno == ENOSPC)
      die("sha1 file '%s' write error. Out of diskspace", f->name);
    die_errno("sha1 file '%s' write error", f->name);
  }

  f->total += count;
  display_throughput(f->tp, f->total);
}

void hashflush(struct hashfile *f)
{
  unsigned offset = f->offset;

  if (offset) {
    if (!f->skip_hash)
      git_hash_update(&f->ctx, f->buffer, offset);
    flush(f, f->buffer, offset);
    f->offset = 0;
  }
}

void free_hashfile(struct hashfile *f)
{
  free(f->buffer);
  free(f->check_buffer);
  free(f);
}

int finalize_hashfile(struct hashfile *f, unsigned char *result,
          enum fsync_component component, unsigned int flags)
{
  int fd;

  hashflush(f);

  if (f->skip_hash)
    hashclr(f->buffer, f->algop);
  else
    git_hash_final(f->buffer, &f->ctx);

  if (result)
    hashcpy(result, f->buffer, f->algop);
  if (flags & CSUM_HASH_IN_STREAM)
    flush(f, f->buffer, f->algop->rawsz);
  if (flags & CSUM_FSYNC)
    fsync_component_or_die(component, f->fd, f->name);
  if (flags & CSUM_CLOSE) {
    if (close(f->fd))
      die_errno("%s: sha1 file error on close", f->name);
    fd = 0;
  } else
    fd = f->fd;
  if (0 <= f->check_fd) {
    char discard;
    int cnt = read_in_full(f->check_fd, &discard, 1);
    if (cnt < 0)
      die_errno("%s: error when reading the tail of sha1 file",
          f->name);
    if (cnt)
      die("%s: sha1 file has trailing garbage", f->name);
    if (close(f->check_fd))
      die_errno("%s: sha1 file error on close", f->name);
  }
  free_hashfile(f);
  return fd;
}

void discard_hashfile(struct hashfile *f)
{
  if (0 <= f->check_fd)
    close(f->check_fd);
  if (0 <= f->fd)
    close(f->fd);
  free_hashfile(f);
}

void hashwrite(struct hashfile *f, const void *buf, unsigned int count)
{
  while (count) {
    unsigned left = f->buffer_len - f->offset;
    unsigned nr = count > left ? left : count;

    if (f->do_crc)
      f->crc32 = crc32(f->crc32, buf, nr);

    if (nr == f->buffer_len) {
      /*
       * Flush a full batch worth of data directly
       * from the input, skipping the memcpy() to
       * the hashfile's buffer. In this block,
       * f->offset is necessarily zero.
       */
      if (!f->skip_hash)
        git_hash_update(&f->ctx, buf, nr);
      flush(f, buf, nr);
    } else {
      /*
       * Copy to the hashfile's buffer, flushing only
       * if it became full.
       */
      memcpy(f->buffer + f->offset, buf, nr);
      f->offset += nr;
      left -= nr;
      if (!left)
        hashflush(f);
    }

    count -= nr;
    buf = (char *) buf + nr;
  }
}

struct hashfile *hashfd_check(const struct git_hash_algo *algop,
            const char *name)
{
  int sink, check;
  struct hashfile *f;

  sink = xopen("/dev/null", O_WRONLY);
  check = xopen(name, O_RDONLY);
  f = hashfd(algop, sink, name);
  f->check_fd = check;
  f->check_buffer = xmalloc(f->buffer_len);

  return f;
}

static struct hashfile *hashfd_internal(const struct git_hash_algo *algop,
          int fd, const char *name,
          struct progress *tp,
          size_t buffer_len)
{
  struct hashfile *f = xmalloc(sizeof(*f));
  f->fd = fd;
  f->check_fd = -1;
  f->offset = 0;
  f->total = 0;
  f->tp = tp;
  f->name = name;
  f->do_crc = 0;
  f->skip_hash = 0;

  f->algop = unsafe_hash_algo(algop);
  f->algop->init_fn(&f->ctx);

  f->buffer_len = buffer_len;
  f->buffer = xmalloc(buffer_len);
  f->check_buffer = NULL;

  return f;
}

struct hashfile *hashfd(const struct git_hash_algo *algop,
      int fd, const char *name)
{
  /*
   * Since we are not going to use a progress meter to
   * measure the rate of data passing through this hashfile,
   * use a larger buffer size to reduce fsync() calls.
   */
  return hashfd_internal(algop, fd, name, NULL, 128 * 1024);
}

struct hashfile *hashfd_throughput(const struct git_hash_algo *algop,
           int fd, const char *name, struct progress *tp)
{
  /*
   * Since we are expecting to report progress of the
   * write into this hashfile, use a smaller buffer
   * size so the progress indicators arrive at a more
   * frequent rate.
   */
  return hashfd_internal(algop, fd, name, tp, 8 * 1024);
}

void hashfile_checkpoint_init(struct hashfile *f,
            struct hashfile_checkpoint *checkpoint)
{
  memset(checkpoint, 0, sizeof(*checkpoint));
  f->algop->init_fn(&checkpoint->ctx);
}

void hashfile_checkpoint(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
{
  hashflush(f);
  checkpoint->offset = f->total;
  git_hash_clone(&checkpoint->ctx, &f->ctx);
}

int hashfile_truncate(struct hashfile *f, struct hashfile_checkpoint *checkpoint)
{
  off_t offset = checkpoint->offset;

  if (ftruncate(f->fd, offset) ||
      lseek(f->fd, offset, SEEK_SET) != offset)
    return -1;
  f->total = offset;
  git_hash_clone(&f->ctx, &checkpoint->ctx);
  f->offset = 0; /* hashflush() was called in checkpoint */
  return 0;
}

void crc32_begin(struct hashfile *f)
{
  f->crc32 = crc32(0, NULL, 0);
  f->do_crc = 1;
}

uint32_t crc32_end(struct hashfile *f)
{
  f->do_crc = 0;
  return f->crc32;
}

int hashfile_checksum_valid(const struct git_hash_algo *algop,
          const unsigned char *data, size_t total_len)
{
  unsigned char got[GIT_MAX_RAWSZ];
  struct git_hash_ctx ctx;
  size_t data_len = total_len - algop->rawsz;

  algop = unsafe_hash_algo(algop);

  if (total_len < algop->rawsz)
    return 0; /* say "too short"? */

  algop->init_fn(&ctx);
  git_hash_update(&ctx, data, data_len);
  git_hash_final(got, &ctx);

  return hasheq(got, data + data_len, algop);
}

Coverage Report

Created: 2025-12-31 07:01

Line	Count	Source
1		/*
2		* csum-file.c
3		*
4		* Copyright (C) 2005 Linus Torvalds
5		*
6		* Simple file write infrastructure for writing SHA1-summed
7		* files. Useful when you write a file that you want to be
8		* able to verify hasn't been messed with afterwards.
9		*/
10
11		#include "git-compat-util.h"
12		#include "csum-file.h"
13		#include "git-zlib.h"
14		#include "hash.h"
15		#include "progress.h"
16
17		static void verify_buffer_or_die(struct hashfile *f,
18		const void *buf,
19		unsigned int count)
20	0	{
21	0	ssize_t ret = read_in_full(f->check_fd, f->check_buffer, count);
22
23	0	if (ret < 0)
24	0	die_errno("%s: sha1 file read error", f->name);
25	0	if ((size_t)ret != count)
26	0	die("%s: sha1 file truncated", f->name);
27	0	if (memcmp(buf, f->check_buffer, count))
28	0	die("sha1 file '%s' validation error", f->name);
29	0	}
30
31		static void flush(struct hashfile f, const void buf, unsigned int count)
32	0	{
33	0	if (0 <= f->check_fd && count)
34	0	verify_buffer_or_die(f, buf, count);
35
36	0	if (write_in_full(f->fd, buf, count) < 0) {
37	0	if (errno == ENOSPC)
38	0	die("sha1 file '%s' write error. Out of diskspace", f->name);
39	0	die_errno("sha1 file '%s' write error", f->name);
40	0	}
41
42	0	f->total += count;
43	0	display_throughput(f->tp, f->total);
44	0	}
45
46		void hashflush(struct hashfile *f)
47	0	{
48	0	unsigned offset = f->offset;
49
50	0	if (offset) {
51	0	if (!f->skip_hash)
52	0	git_hash_update(&f->ctx, f->buffer, offset);
53	0	flush(f, f->buffer, offset);
54	0	f->offset = 0;
55	0	}
56	0	}
57
58		void free_hashfile(struct hashfile *f)
59	0	{
60	0	free(f->buffer);
61	0	free(f->check_buffer);
62	0	free(f);
63	0	}
64
65		int finalize_hashfile(struct hashfile f, unsigned char result,
66		enum fsync_component component, unsigned int flags)
67	0	{
68	0	int fd;
69
70	0	hashflush(f);
71
72	0	if (f->skip_hash)
73	0	hashclr(f->buffer, f->algop);
74	0	else
75	0	git_hash_final(f->buffer, &f->ctx);
76
77	0	if (result)
78	0	hashcpy(result, f->buffer, f->algop);
79	0	if (flags & CSUM_HASH_IN_STREAM)
80	0	flush(f, f->buffer, f->algop->rawsz);
81	0	if (flags & CSUM_FSYNC)
82	0	fsync_component_or_die(component, f->fd, f->name);
83	0	if (flags & CSUM_CLOSE) {
84	0	if (close(f->fd))
85	0	die_errno("%s: sha1 file error on close", f->name);
86	0	fd = 0;
87	0	} else
88	0	fd = f->fd;
89	0	if (0 <= f->check_fd) {
90	0	char discard;
91	0	int cnt = read_in_full(f->check_fd, &discard, 1);
92	0	if (cnt < 0)
93	0	die_errno("%s: error when reading the tail of sha1 file",
94	0	f->name);
95	0	if (cnt)
96	0	die("%s: sha1 file has trailing garbage", f->name);
97	0	if (close(f->check_fd))
98	0	die_errno("%s: sha1 file error on close", f->name);
99	0	}
100	0	free_hashfile(f);
101	0	return fd;
102	0	}
103
104		void discard_hashfile(struct hashfile *f)
105	0	{
106	0	if (0 <= f->check_fd)
107	0	close(f->check_fd);
108	0	if (0 <= f->fd)
109	0	close(f->fd);
110	0	free_hashfile(f);
111	0	}
112
113		void hashwrite(struct hashfile f, const void buf, unsigned int count)
114	0	{
115	0	while (count) {
116	0	unsigned left = f->buffer_len - f->offset;
117	0	unsigned nr = count > left ? left : count;
118
119	0	if (f->do_crc)
120	0	f->crc32 = crc32(f->crc32, buf, nr);
121
122	0	if (nr == f->buffer_len) {
123		/*
124		* Flush a full batch worth of data directly
125		* from the input, skipping the memcpy() to
126		* the hashfile's buffer. In this block,
127		* f->offset is necessarily zero.
128		*/
129	0	if (!f->skip_hash)
130	0	git_hash_update(&f->ctx, buf, nr);
131	0	flush(f, buf, nr);
132	0	} else {
133		/*
134		* Copy to the hashfile's buffer, flushing only
135		* if it became full.
136		*/
137	0	memcpy(f->buffer + f->offset, buf, nr);
138	0	f->offset += nr;
139	0	left -= nr;
140	0	if (!left)
141	0	hashflush(f);
142	0	}
143
144	0	count -= nr;
145	0	buf = (char *) buf + nr;
146	0	}
147	0	}
148
149		struct hashfile hashfd_check(const struct git_hash_algo algop,
150		const char *name)
151	0	{
152	0	int sink, check;
153	0	struct hashfile *f;
154
155	0	sink = xopen("/dev/null", O_WRONLY);
156	0	check = xopen(name, O_RDONLY);
157	0	f = hashfd(algop, sink, name);
158	0	f->check_fd = check;
159	0	f->check_buffer = xmalloc(f->buffer_len);
160
161	0	return f;
162	0	}
163
164		static struct hashfile hashfd_internal(const struct git_hash_algo algop,
165		int fd, const char *name,
166		struct progress *tp,
167		size_t buffer_len)
168	0	{
169	0	struct hashfile f = xmalloc(sizeof(f));
170	0	f->fd = fd;
171	0	f->check_fd = -1;
172	0	f->offset = 0;
173	0	f->total = 0;
174	0	f->tp = tp;
175	0	f->name = name;
176	0	f->do_crc = 0;
177	0	f->skip_hash = 0;
178
179	0	f->algop = unsafe_hash_algo(algop);
180	0	f->algop->init_fn(&f->ctx);
181
182	0	f->buffer_len = buffer_len;
183	0	f->buffer = xmalloc(buffer_len);
184	0	f->check_buffer = NULL;
185
186	0	return f;
187	0	}
188
189		struct hashfile hashfd(const struct git_hash_algo algop,
190		int fd, const char *name)
191	0	{
192		/*
193		* Since we are not going to use a progress meter to
194		* measure the rate of data passing through this hashfile,
195		* use a larger buffer size to reduce fsync() calls.
196		*/
197	0	return hashfd_internal(algop, fd, name, NULL, 128 * 1024);
198	0	}
199
200		struct hashfile hashfd_throughput(const struct git_hash_algo algop,
201		int fd, const char name, struct progress tp)
202	0	{
203		/*
204		* Since we are expecting to report progress of the
205		* write into this hashfile, use a smaller buffer
206		* size so the progress indicators arrive at a more
207		* frequent rate.
208		*/
209	0	return hashfd_internal(algop, fd, name, tp, 8 * 1024);
210	0	}
211
212		void hashfile_checkpoint_init(struct hashfile *f,
213		struct hashfile_checkpoint *checkpoint)
214	0	{
215	0	memset(checkpoint, 0, sizeof(*checkpoint));
216	0	f->algop->init_fn(&checkpoint->ctx);
217	0	}
218
219		void hashfile_checkpoint(struct hashfile f, struct hashfile_checkpoint checkpoint)
220	0	{
221	0	hashflush(f);
222	0	checkpoint->offset = f->total;
223	0	git_hash_clone(&checkpoint->ctx, &f->ctx);
224	0	}
225
226		int hashfile_truncate(struct hashfile f, struct hashfile_checkpoint checkpoint)
227	0	{
228	0	off_t offset = checkpoint->offset;
229
230	0	if (ftruncate(f->fd, offset) \|\|
231	0	lseek(f->fd, offset, SEEK_SET) != offset)
232	0	return -1;
233	0	f->total = offset;
234	0	git_hash_clone(&f->ctx, &checkpoint->ctx);
235	0	f->offset = 0; /* hashflush() was called in checkpoint */
236	0	return 0;
237	0	}
238
239		void crc32_begin(struct hashfile *f)
240	0	{
241	0	f->crc32 = crc32(0, NULL, 0);
242	0	f->do_crc = 1;
243	0	}
244
245		uint32_t crc32_end(struct hashfile *f)
246	0	{
247	0	f->do_crc = 0;
248	0	return f->crc32;
249	0	}
250
251		int hashfile_checksum_valid(const struct git_hash_algo *algop,
252		const unsigned char *data, size_t total_len)
253	0	{
254	0	unsigned char got[GIT_MAX_RAWSZ];
255	0	struct git_hash_ctx ctx;
256	0	size_t data_len = total_len - algop->rawsz;
257
258	0	algop = unsafe_hash_algo(algop);
259
260	0	if (total_len < algop->rawsz)
261	0	return 0; /* say "too short"? */
262
263	0	algop->init_fn(&ctx);
264	0	git_hash_update(&ctx, data, data_len);
265	0	git_hash_final(got, &ctx);
266
267	0	return hasheq(got, data + data_len, algop);
268	0	}