2 * GIT - The information manager from hell
4 * Copyright (C) Linus Torvalds, 2005
6 * This handles basic git object files - packing, unpacking,
10 #define USE_THE_REPOSITORY_VARIABLE
11 #define DISABLE_SIGN_COMPARE_WARNINGS
13 #include "git-compat-util.h"
14 #include "bulk-checkin.h"
17 #include "environment.h"
22 #include "object-file-convert.h"
23 #include "object-file.h"
30 #include "streaming.h"
32 /* The maximum size for an object header. */
33 #define MAX_HEADER_LEN 32
35 static int get_conv_flags(unsigned flags
)
37 if (flags
& INDEX_RENORMALIZE
)
38 return CONV_EOL_RENORMALIZE
;
39 else if (flags
& INDEX_WRITE_OBJECT
)
40 return global_conv_flags_eol
| CONV_WRITE_OBJECT
;
45 static void fill_loose_path(struct strbuf
*buf
, const struct object_id
*oid
)
48 for (i
= 0; i
< the_hash_algo
->rawsz
; i
++) {
49 static char hex
[] = "0123456789abcdef";
50 unsigned int val
= oid
->hash
[i
];
51 strbuf_addch(buf
, hex
[val
>> 4]);
52 strbuf_addch(buf
, hex
[val
& 0xf]);
54 strbuf_addch(buf
, '/');
58 const char *odb_loose_path(struct odb_source
*source
,
60 const struct object_id
*oid
)
63 strbuf_addstr(buf
, source
->path
);
64 strbuf_addch(buf
, '/');
65 fill_loose_path(buf
, oid
);
69 /* Returns 1 if we have successfully freshened the file, 0 otherwise. */
70 static int freshen_file(const char *fn
)
72 return !utime(fn
, NULL
);
76 * All of the check_and_freshen functions return 1 if the file exists and was
77 * freshened (if freshening was requested), 0 otherwise. If they return
78 * 0, you should not assume that it is safe to skip a write of the object (it
79 * either does not exist on disk, or has a stale mtime and may be subject to
82 int check_and_freshen_file(const char *fn
, int freshen
)
86 if (freshen
&& !freshen_file(fn
))
91 static int check_and_freshen_odb(struct odb_source
*source
,
92 const struct object_id
*oid
,
95 static struct strbuf path
= STRBUF_INIT
;
96 odb_loose_path(source
, &path
, oid
);
97 return check_and_freshen_file(path
.buf
, freshen
);
100 static int check_and_freshen_local(const struct object_id
*oid
, int freshen
)
102 return check_and_freshen_odb(the_repository
->objects
->sources
, oid
, freshen
);
105 static int check_and_freshen_nonlocal(const struct object_id
*oid
, int freshen
)
107 struct odb_source
*source
;
109 odb_prepare_alternates(the_repository
->objects
);
110 for (source
= the_repository
->objects
->sources
->next
; source
; source
= source
->next
) {
111 if (check_and_freshen_odb(source
, oid
, freshen
))
117 static int check_and_freshen(const struct object_id
*oid
, int freshen
)
119 return check_and_freshen_local(oid
, freshen
) ||
120 check_and_freshen_nonlocal(oid
, freshen
);
123 int has_loose_object_nonlocal(const struct object_id
*oid
)
125 return check_and_freshen_nonlocal(oid
, 0);
128 int has_loose_object(const struct object_id
*oid
)
130 return check_and_freshen(oid
, 0);
133 int format_object_header(char *str
, size_t size
, enum object_type type
,
136 const char *name
= type_name(type
);
139 BUG("could not get a type name for 'enum object_type' value %d", type
);
141 return xsnprintf(str
, size
, "%s %"PRIuMAX
, name
, (uintmax_t)objsize
) + 1;
144 int check_object_signature(struct repository
*r
, const struct object_id
*oid
,
145 void *buf
, unsigned long size
,
146 enum object_type type
)
148 const struct git_hash_algo
*algo
=
149 oid
->algo
? &hash_algos
[oid
->algo
] : r
->hash_algo
;
150 struct object_id real_oid
;
152 hash_object_file(algo
, buf
, size
, type
, &real_oid
);
154 return !oideq(oid
, &real_oid
) ? -1 : 0;
157 int stream_object_signature(struct repository
*r
, const struct object_id
*oid
)
159 struct object_id real_oid
;
161 enum object_type obj_type
;
162 struct git_istream
*st
;
163 struct git_hash_ctx c
;
164 char hdr
[MAX_HEADER_LEN
];
167 st
= open_istream(r
, oid
, &obj_type
, &size
, NULL
);
171 /* Generate the header */
172 hdrlen
= format_object_header(hdr
, sizeof(hdr
), obj_type
, size
);
175 r
->hash_algo
->init_fn(&c
);
176 git_hash_update(&c
, hdr
, hdrlen
);
179 ssize_t readlen
= read_istream(st
, buf
, sizeof(buf
));
187 git_hash_update(&c
, buf
, readlen
);
189 git_hash_final_oid(&real_oid
, &c
);
191 return !oideq(oid
, &real_oid
) ? -1 : 0;
195 * Find "oid" as a loose object in the local repository or in an alternate.
196 * Returns 0 on success, negative on failure.
198 * The "path" out-parameter will give the path of the object we found (if any).
199 * Note that it may point to static storage and is only valid until another
200 * call to stat_loose_object().
202 static int stat_loose_object(struct repository
*r
, const struct object_id
*oid
,
203 struct stat
*st
, const char **path
)
205 struct odb_source
*source
;
206 static struct strbuf buf
= STRBUF_INIT
;
208 odb_prepare_alternates(r
->objects
);
209 for (source
= r
->objects
->sources
; source
; source
= source
->next
) {
210 *path
= odb_loose_path(source
, &buf
, oid
);
211 if (!lstat(*path
, st
))
219 * Like stat_loose_object(), but actually open the object and return the
220 * descriptor. See the caveats on the "path" parameter above.
222 static int open_loose_object(struct repository
*r
,
223 const struct object_id
*oid
, const char **path
)
226 struct odb_source
*source
;
227 int most_interesting_errno
= ENOENT
;
228 static struct strbuf buf
= STRBUF_INIT
;
230 odb_prepare_alternates(r
->objects
);
231 for (source
= r
->objects
->sources
; source
; source
= source
->next
) {
232 *path
= odb_loose_path(source
, &buf
, oid
);
233 fd
= git_open(*path
);
237 if (most_interesting_errno
== ENOENT
)
238 most_interesting_errno
= errno
;
240 errno
= most_interesting_errno
;
244 static int quick_has_loose(struct repository
*r
,
245 const struct object_id
*oid
)
247 struct odb_source
*source
;
249 odb_prepare_alternates(r
->objects
);
250 for (source
= r
->objects
->sources
; source
; source
= source
->next
) {
251 if (oidtree_contains(odb_loose_cache(source
, oid
), oid
))
258 * Map and close the given loose object fd. The path argument is used for
261 static void *map_fd(int fd
, const char *path
, unsigned long *size
)
266 if (!fstat(fd
, &st
)) {
267 *size
= xsize_t(st
.st_size
);
269 /* mmap() is forbidden on empty files */
270 error(_("object file %s is empty"), path
);
274 map
= xmmap(NULL
, *size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
280 void *map_loose_object(struct repository
*r
,
281 const struct object_id
*oid
,
285 int fd
= open_loose_object(r
, oid
, &p
);
289 return map_fd(fd
, p
, size
);
292 enum unpack_loose_header_result
unpack_loose_header(git_zstream
*stream
,
294 unsigned long mapsize
,
296 unsigned long bufsiz
)
300 /* Get the data stream */
301 memset(stream
, 0, sizeof(*stream
));
302 stream
->next_in
= map
;
303 stream
->avail_in
= mapsize
;
304 stream
->next_out
= buffer
;
305 stream
->avail_out
= bufsiz
;
307 git_inflate_init(stream
);
309 status
= git_inflate(stream
, 0);
311 if (status
!= Z_OK
&& status
!= Z_STREAM_END
)
315 * Check if entire header is unpacked in the first iteration.
317 if (memchr(buffer
, '\0', stream
->next_out
- (unsigned char *)buffer
))
321 * We have a header longer than MAX_HEADER_LEN.
323 return ULHR_TOO_LONG
;
326 static void *unpack_loose_rest(git_zstream
*stream
,
327 void *buffer
, unsigned long size
,
328 const struct object_id
*oid
)
330 int bytes
= strlen(buffer
) + 1;
331 unsigned char *buf
= xmallocz(size
);
335 n
= stream
->total_out
- bytes
;
338 memcpy(buf
, (char *) buffer
+ bytes
, n
);
342 * The above condition must be (bytes <= size), not
343 * (bytes < size). In other words, even though we
344 * expect no more output and set avail_out to zero,
345 * the input zlib stream may have bytes that express
346 * "this concludes the stream", and we *do* want to
349 * Otherwise we would not be able to test that we
350 * consumed all the input to reach the expected size;
351 * we also want to check that zlib tells us that all
352 * went well with status == Z_STREAM_END at the end.
354 stream
->next_out
= buf
+ bytes
;
355 stream
->avail_out
= size
- bytes
;
356 while (status
== Z_OK
) {
358 status
= git_inflate(stream
, Z_FINISH
);
363 if (status
!= Z_STREAM_END
) {
364 error(_("corrupt loose object '%s'"), oid_to_hex(oid
));
366 } else if (stream
->avail_in
) {
367 error(_("garbage at end of loose object '%s'"),
376 * We used to just use "sscanf()", but that's actually way
377 * too permissive for what we want to check. So do an anal
378 * object header parse by hand.
380 int parse_loose_header(const char *hdr
, struct object_info
*oi
)
382 const char *type_buf
= hdr
;
384 int type
, type_len
= 0;
387 * The type can be of any size but is followed by
399 type
= type_from_string_gently(type_buf
, type_len
, 1);
404 * The length must follow immediately, and be in canonical
405 * decimal format (ie "010" is not valid).
412 unsigned long c
= *hdr
- '0';
416 size
= st_add(st_mult(size
, 10), c
);
421 *oi
->sizep
= cast_size_t_to_ulong(size
);
424 * The length must be followed by a zero byte
430 * The format is valid, but the type may still be bogus. The
431 * Caller needs to check its oi->typep.
436 int loose_object_info(struct repository
*r
,
437 const struct object_id
*oid
,
438 struct object_info
*oi
, int flags
)
442 unsigned long mapsize
;
446 char hdr
[MAX_HEADER_LEN
];
447 unsigned long size_scratch
;
448 enum object_type type_scratch
;
450 if (oi
->delta_base_oid
)
451 oidclr(oi
->delta_base_oid
, the_repository
->hash_algo
);
454 * If we don't care about type or size, then we don't
455 * need to look inside the object at all. Note that we
456 * do not optimize out the stat call, even if the
457 * caller doesn't care about the disk-size, since our
458 * return value implicitly indicates whether the
459 * object even exists.
461 if (!oi
->typep
&& !oi
->sizep
&& !oi
->contentp
) {
463 if (!oi
->disk_sizep
&& (flags
& OBJECT_INFO_QUICK
))
464 return quick_has_loose(r
, oid
) ? 0 : -1;
465 if (stat_loose_object(r
, oid
, &st
, &path
) < 0)
468 *oi
->disk_sizep
= st
.st_size
;
472 fd
= open_loose_object(r
, oid
, &path
);
475 error_errno(_("unable to open loose object %s"), oid_to_hex(oid
));
478 map
= map_fd(fd
, path
, &mapsize
);
483 oi
->sizep
= &size_scratch
;
485 oi
->typep
= &type_scratch
;
488 *oi
->disk_sizep
= mapsize
;
490 switch (unpack_loose_header(&stream
, map
, mapsize
, hdr
, sizeof(hdr
))) {
492 if (parse_loose_header(hdr
, oi
) < 0)
493 status
= error(_("unable to parse %s header"), oid_to_hex(oid
));
494 else if (*oi
->typep
< 0)
495 die(_("invalid object type"));
499 *oi
->contentp
= unpack_loose_rest(&stream
, hdr
, *oi
->sizep
, oid
);
506 status
= error(_("unable to unpack %s header"),
510 status
= error(_("header for %s too long, exceeds %d bytes"),
511 oid_to_hex(oid
), MAX_HEADER_LEN
);
515 if (status
&& (flags
& OBJECT_INFO_DIE_IF_CORRUPT
))
516 die(_("loose object %s (stored in %s) is corrupt"),
517 oid_to_hex(oid
), path
);
520 git_inflate_end(&stream
);
521 munmap(map
, mapsize
);
522 if (oi
->sizep
== &size_scratch
)
524 if (oi
->typep
== &type_scratch
)
526 oi
->whence
= OI_LOOSE
;
530 static void hash_object_body(const struct git_hash_algo
*algo
, struct git_hash_ctx
*c
,
531 const void *buf
, unsigned long len
,
532 struct object_id
*oid
,
533 char *hdr
, int *hdrlen
)
536 git_hash_update(c
, hdr
, *hdrlen
);
537 git_hash_update(c
, buf
, len
);
538 git_hash_final_oid(oid
, c
);
541 static void write_object_file_prepare(const struct git_hash_algo
*algo
,
542 const void *buf
, unsigned long len
,
543 enum object_type type
, struct object_id
*oid
,
544 char *hdr
, int *hdrlen
)
546 struct git_hash_ctx c
;
548 /* Generate the header */
549 *hdrlen
= format_object_header(hdr
, *hdrlen
, type
, len
);
552 hash_object_body(algo
, &c
, buf
, len
, oid
, hdr
, hdrlen
);
555 #define CHECK_COLLISION_DEST_VANISHED -2
557 static int check_collision(const char *source
, const char *dest
)
559 char buf_source
[4096], buf_dest
[4096];
560 int fd_source
= -1, fd_dest
= -1;
563 fd_source
= open(source
, O_RDONLY
);
565 ret
= error_errno(_("unable to open %s"), source
);
569 fd_dest
= open(dest
, O_RDONLY
);
572 ret
= error_errno(_("unable to open %s"), dest
);
574 ret
= CHECK_COLLISION_DEST_VANISHED
;
581 sz_a
= read_in_full(fd_source
, buf_source
, sizeof(buf_source
));
583 ret
= error_errno(_("unable to read %s"), source
);
587 sz_b
= read_in_full(fd_dest
, buf_dest
, sizeof(buf_dest
));
589 ret
= error_errno(_("unable to read %s"), dest
);
593 if (sz_a
!= sz_b
|| memcmp(buf_source
, buf_dest
, sz_a
)) {
594 ret
= error(_("files '%s' and '%s' differ in contents"),
599 if (sz_a
< sizeof(buf_source
))
612 * Move the just written object into its final resting place.
614 int finalize_object_file(const char *tmpfile
, const char *filename
)
616 return finalize_object_file_flags(tmpfile
, filename
, 0);
619 int finalize_object_file_flags(const char *tmpfile
, const char *filename
,
620 enum finalize_object_file_flags flags
)
622 unsigned retries
= 0;
628 if (object_creation_mode
== OBJECT_CREATION_USES_RENAMES
)
630 else if (link(tmpfile
, filename
))
633 unlink_or_warn(tmpfile
);
636 * Coda hack - coda doesn't like cross-directory links,
637 * so we fall back to a rename, which will mean that it
638 * won't be able to check collisions, but that's not a
641 * The same holds for FAT formatted media.
643 * When this succeeds, we just return. We have nothing
646 if (ret
&& ret
!= EEXIST
) {
650 if (!stat(filename
, &st
))
652 else if (!rename(tmpfile
, filename
))
659 int saved_errno
= errno
;
660 unlink_or_warn(tmpfile
);
662 return error_errno(_("unable to write file %s"), filename
);
664 if (!(flags
& FOF_SKIP_COLLISION_CHECK
)) {
665 ret
= check_collision(tmpfile
, filename
);
666 if (ret
== CHECK_COLLISION_DEST_VANISHED
) {
668 return error(_("unable to write repeatedly vanishing file %s"),
675 unlink_or_warn(tmpfile
);
679 if (adjust_shared_perm(the_repository
, filename
))
680 return error(_("unable to set permission to '%s'"), filename
);
684 void hash_object_file(const struct git_hash_algo
*algo
, const void *buf
,
685 unsigned long len
, enum object_type type
,
686 struct object_id
*oid
)
688 char hdr
[MAX_HEADER_LEN
];
689 int hdrlen
= sizeof(hdr
);
691 write_object_file_prepare(algo
, buf
, len
, type
, oid
, hdr
, &hdrlen
);
694 /* Finalize a file on disk, and close it. */
695 static void close_loose_object(int fd
, const char *filename
)
697 if (the_repository
->objects
->sources
->will_destroy
)
700 if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT
))
701 fsync_loose_object_bulk_checkin(fd
, filename
);
702 else if (fsync_object_files
> 0)
703 fsync_or_die(fd
, filename
);
705 fsync_component_or_die(FSYNC_COMPONENT_LOOSE_OBJECT
, fd
,
710 die_errno(_("error when closing loose object file"));
713 /* Size of directory component, including the ending '/' */
714 static inline int directory_size(const char *filename
)
716 const char *s
= strrchr(filename
, '/');
719 return s
- filename
+ 1;
723 * This creates a temporary file in the same directory as the final
726 * We want to avoid cross-directory filename renames, because those
727 * can have problems on various filesystems (FAT, NFS, Coda).
729 static int create_tmpfile(struct strbuf
*tmp
, const char *filename
)
731 int fd
, dirlen
= directory_size(filename
);
734 strbuf_add(tmp
, filename
, dirlen
);
735 strbuf_addstr(tmp
, "tmp_obj_XXXXXX");
736 fd
= git_mkstemp_mode(tmp
->buf
, 0444);
737 if (fd
< 0 && dirlen
&& errno
== ENOENT
) {
739 * Make sure the directory exists; note that the contents
740 * of the buffer are undefined after mkstemp returns an
741 * error, so we have to rewrite the whole buffer from
745 strbuf_add(tmp
, filename
, dirlen
- 1);
746 if (mkdir(tmp
->buf
, 0777) && errno
!= EEXIST
)
748 if (adjust_shared_perm(the_repository
, tmp
->buf
))
752 strbuf_addstr(tmp
, "/tmp_obj_XXXXXX");
753 fd
= git_mkstemp_mode(tmp
->buf
, 0444);
759 * Common steps for loose object writers to start writing loose
762 * - Create tmpfile for the loose object.
763 * - Setup zlib stream for compression.
764 * - Start to feed header to zlib stream.
766 * Returns a "fd", which should later be provided to
767 * end_loose_object_common().
769 static int start_loose_object_common(struct strbuf
*tmp_file
,
770 const char *filename
, unsigned flags
,
772 unsigned char *buf
, size_t buflen
,
773 struct git_hash_ctx
*c
, struct git_hash_ctx
*compat_c
,
774 char *hdr
, int hdrlen
)
776 struct repository
*repo
= the_repository
;
777 const struct git_hash_algo
*algo
= repo
->hash_algo
;
778 const struct git_hash_algo
*compat
= repo
->compat_hash_algo
;
781 fd
= create_tmpfile(tmp_file
, filename
);
783 if (flags
& WRITE_OBJECT_FILE_SILENT
)
785 else if (errno
== EACCES
)
786 return error(_("insufficient permission for adding "
787 "an object to repository database %s"),
788 repo_get_object_directory(the_repository
));
791 _("unable to create temporary file"));
794 /* Setup zlib stream for compression */
795 git_deflate_init(stream
, zlib_compression_level
);
796 stream
->next_out
= buf
;
797 stream
->avail_out
= buflen
;
799 if (compat
&& compat_c
)
800 compat
->init_fn(compat_c
);
802 /* Start to feed header to zlib stream */
803 stream
->next_in
= (unsigned char *)hdr
;
804 stream
->avail_in
= hdrlen
;
805 while (git_deflate(stream
, 0) == Z_OK
)
807 git_hash_update(c
, hdr
, hdrlen
);
808 if (compat
&& compat_c
)
809 git_hash_update(compat_c
, hdr
, hdrlen
);
815 * Common steps for the inner git_deflate() loop for writing loose
816 * objects. Returns what git_deflate() returns.
818 static int write_loose_object_common(struct git_hash_ctx
*c
, struct git_hash_ctx
*compat_c
,
819 git_zstream
*stream
, const int flush
,
820 unsigned char *in0
, const int fd
,
821 unsigned char *compressed
,
822 const size_t compressed_len
)
824 struct repository
*repo
= the_repository
;
825 const struct git_hash_algo
*compat
= repo
->compat_hash_algo
;
828 ret
= git_deflate(stream
, flush
? Z_FINISH
: 0);
829 git_hash_update(c
, in0
, stream
->next_in
- in0
);
830 if (compat
&& compat_c
)
831 git_hash_update(compat_c
, in0
, stream
->next_in
- in0
);
832 if (write_in_full(fd
, compressed
, stream
->next_out
- compressed
) < 0)
833 die_errno(_("unable to write loose object file"));
834 stream
->next_out
= compressed
;
835 stream
->avail_out
= compressed_len
;
841 * Common steps for loose object writers to end writing loose objects:
843 * - End the compression of zlib stream.
844 * - Get the calculated oid to "oid".
846 static int end_loose_object_common(struct git_hash_ctx
*c
, struct git_hash_ctx
*compat_c
,
847 git_zstream
*stream
, struct object_id
*oid
,
848 struct object_id
*compat_oid
)
850 struct repository
*repo
= the_repository
;
851 const struct git_hash_algo
*compat
= repo
->compat_hash_algo
;
854 ret
= git_deflate_end_gently(stream
);
857 git_hash_final_oid(oid
, c
);
858 if (compat
&& compat_c
)
859 git_hash_final_oid(compat_oid
, compat_c
);
864 static int write_loose_object(const struct object_id
*oid
, char *hdr
,
865 int hdrlen
, const void *buf
, unsigned long len
,
866 time_t mtime
, unsigned flags
)
869 unsigned char compressed
[4096];
871 struct git_hash_ctx c
;
872 struct object_id parano_oid
;
873 static struct strbuf tmp_file
= STRBUF_INIT
;
874 static struct strbuf filename
= STRBUF_INIT
;
876 if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT
))
877 prepare_loose_object_bulk_checkin();
879 odb_loose_path(the_repository
->objects
->sources
, &filename
, oid
);
881 fd
= start_loose_object_common(&tmp_file
, filename
.buf
, flags
,
882 &stream
, compressed
, sizeof(compressed
),
883 &c
, NULL
, hdr
, hdrlen
);
887 /* Then the data itself.. */
888 stream
.next_in
= (void *)buf
;
889 stream
.avail_in
= len
;
891 unsigned char *in0
= stream
.next_in
;
893 ret
= write_loose_object_common(&c
, NULL
, &stream
, 1, in0
, fd
,
894 compressed
, sizeof(compressed
));
895 } while (ret
== Z_OK
);
897 if (ret
!= Z_STREAM_END
)
898 die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid
),
900 ret
= end_loose_object_common(&c
, NULL
, &stream
, ¶no_oid
, NULL
);
902 die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid
),
904 if (!oideq(oid
, ¶no_oid
))
905 die(_("confused by unstable object source data for %s"),
908 close_loose_object(fd
, tmp_file
.buf
);
914 if (utime(tmp_file
.buf
, &utb
) < 0 &&
915 !(flags
& WRITE_OBJECT_FILE_SILENT
))
916 warning_errno(_("failed utime() on %s"), tmp_file
.buf
);
919 return finalize_object_file_flags(tmp_file
.buf
, filename
.buf
,
920 FOF_SKIP_COLLISION_CHECK
);
923 static int freshen_loose_object(const struct object_id
*oid
)
925 return check_and_freshen(oid
, 1);
928 static int freshen_packed_object(const struct object_id
*oid
)
931 if (!find_pack_entry(the_repository
, oid
, &e
))
937 if (!freshen_file(e
.p
->pack_name
))
943 int stream_loose_object(struct input_stream
*in_stream
, size_t len
,
944 struct object_id
*oid
)
946 const struct git_hash_algo
*compat
= the_repository
->compat_hash_algo
;
947 struct object_id compat_oid
;
948 int fd
, ret
, err
= 0, flush
= 0;
949 unsigned char compressed
[4096];
951 struct git_hash_ctx c
, compat_c
;
952 struct strbuf tmp_file
= STRBUF_INIT
;
953 struct strbuf filename
= STRBUF_INIT
;
955 char hdr
[MAX_HEADER_LEN
];
958 if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT
))
959 prepare_loose_object_bulk_checkin();
961 /* Since oid is not determined, save tmp file to odb path. */
962 strbuf_addf(&filename
, "%s/", repo_get_object_directory(the_repository
));
963 hdrlen
= format_object_header(hdr
, sizeof(hdr
), OBJ_BLOB
, len
);
966 * Common steps for write_loose_object and stream_loose_object to
967 * start writing loose objects:
969 * - Create tmpfile for the loose object.
970 * - Setup zlib stream for compression.
971 * - Start to feed header to zlib stream.
973 fd
= start_loose_object_common(&tmp_file
, filename
.buf
, 0,
974 &stream
, compressed
, sizeof(compressed
),
975 &c
, &compat_c
, hdr
, hdrlen
);
981 /* Then the data itself.. */
983 unsigned char *in0
= stream
.next_in
;
985 if (!stream
.avail_in
&& !in_stream
->is_finished
) {
986 const void *in
= in_stream
->read(in_stream
, &stream
.avail_in
);
987 stream
.next_in
= (void *)in
;
988 in0
= (unsigned char *)in
;
989 /* All data has been read. */
990 if (in_stream
->is_finished
)
993 ret
= write_loose_object_common(&c
, &compat_c
, &stream
, flush
, in0
, fd
,
994 compressed
, sizeof(compressed
));
996 * Unlike write_loose_object(), we do not have the entire
997 * buffer. If we get Z_BUF_ERROR due to too few input bytes,
998 * then we'll replenish them in the next input_stream->read()
1001 } while (ret
== Z_OK
|| ret
== Z_BUF_ERROR
);
1003 if (stream
.total_in
!= len
+ hdrlen
)
1004 die(_("write stream object %ld != %"PRIuMAX
), stream
.total_in
,
1005 (uintmax_t)len
+ hdrlen
);
1008 * Common steps for write_loose_object and stream_loose_object to
1009 * end writing loose object:
1011 * - End the compression of zlib stream.
1012 * - Get the calculated oid.
1014 if (ret
!= Z_STREAM_END
)
1015 die(_("unable to stream deflate new object (%d)"), ret
);
1016 ret
= end_loose_object_common(&c
, &compat_c
, &stream
, oid
, &compat_oid
);
1018 die(_("deflateEnd on stream object failed (%d)"), ret
);
1019 close_loose_object(fd
, tmp_file
.buf
);
1021 if (freshen_packed_object(oid
) || freshen_loose_object(oid
)) {
1022 unlink_or_warn(tmp_file
.buf
);
1026 odb_loose_path(the_repository
->objects
->sources
, &filename
, oid
);
1028 /* We finally know the object path, and create the missing dir. */
1029 dirlen
= directory_size(filename
.buf
);
1031 struct strbuf dir
= STRBUF_INIT
;
1032 strbuf_add(&dir
, filename
.buf
, dirlen
);
1034 if (safe_create_dir_in_gitdir(the_repository
, dir
.buf
) &&
1036 err
= error_errno(_("unable to create directory %s"), dir
.buf
);
1037 strbuf_release(&dir
);
1040 strbuf_release(&dir
);
1043 err
= finalize_object_file_flags(tmp_file
.buf
, filename
.buf
,
1044 FOF_SKIP_COLLISION_CHECK
);
1046 err
= repo_add_loose_object_map(the_repository
, oid
, &compat_oid
);
1048 strbuf_release(&tmp_file
);
1049 strbuf_release(&filename
);
1053 int write_object_file_flags(const void *buf
, unsigned long len
,
1054 enum object_type type
, struct object_id
*oid
,
1055 struct object_id
*compat_oid_in
, unsigned flags
)
1057 struct repository
*repo
= the_repository
;
1058 const struct git_hash_algo
*algo
= repo
->hash_algo
;
1059 const struct git_hash_algo
*compat
= repo
->compat_hash_algo
;
1060 struct object_id compat_oid
;
1061 char hdr
[MAX_HEADER_LEN
];
1062 int hdrlen
= sizeof(hdr
);
1064 /* Generate compat_oid */
1067 oidcpy(&compat_oid
, compat_oid_in
);
1068 else if (type
== OBJ_BLOB
)
1069 hash_object_file(compat
, buf
, len
, type
, &compat_oid
);
1071 struct strbuf converted
= STRBUF_INIT
;
1072 convert_object_file(the_repository
, &converted
, algo
, compat
,
1074 hash_object_file(compat
, converted
.buf
, converted
.len
,
1076 strbuf_release(&converted
);
1080 /* Normally if we have it in the pack then we do not bother writing
1081 * it out into .git/objects/??/?{38} file.
1083 write_object_file_prepare(algo
, buf
, len
, type
, oid
, hdr
, &hdrlen
);
1084 if (freshen_packed_object(oid
) || freshen_loose_object(oid
))
1086 if (write_loose_object(oid
, hdr
, hdrlen
, buf
, len
, 0, flags
))
1089 return repo_add_loose_object_map(repo
, oid
, &compat_oid
);
1093 int force_object_loose(const struct object_id
*oid
, time_t mtime
)
1095 struct repository
*repo
= the_repository
;
1096 const struct git_hash_algo
*compat
= repo
->compat_hash_algo
;
1099 struct object_info oi
= OBJECT_INFO_INIT
;
1100 struct object_id compat_oid
;
1101 enum object_type type
;
1102 char hdr
[MAX_HEADER_LEN
];
1106 if (has_loose_object(oid
))
1111 if (odb_read_object_info_extended(the_repository
->objects
, oid
, &oi
, 0))
1112 return error(_("cannot read object for %s"), oid_to_hex(oid
));
1114 if (repo_oid_to_algop(repo
, oid
, compat
, &compat_oid
))
1115 return error(_("cannot map object %s to %s"),
1116 oid_to_hex(oid
), compat
->name
);
1118 hdrlen
= format_object_header(hdr
, sizeof(hdr
), type
, len
);
1119 ret
= write_loose_object(oid
, hdr
, hdrlen
, buf
, len
, mtime
, 0);
1121 ret
= repo_add_loose_object_map(the_repository
, oid
, &compat_oid
);
1128 * We can't use the normal fsck_error_function() for index_mem(),
1129 * because we don't yet have a valid oid for it to report. Instead,
1130 * report the minimal fsck error here, and rely on the caller to
1131 * give more context.
1133 static int hash_format_check_report(struct fsck_options
*opts UNUSED
,
1134 void *fsck_report UNUSED
,
1135 enum fsck_msg_type msg_type UNUSED
,
1136 enum fsck_msg_id msg_id UNUSED
,
1137 const char *message
)
1139 error(_("object fails fsck: %s"), message
);
1143 static int index_mem(struct index_state
*istate
,
1144 struct object_id
*oid
,
1145 const void *buf
, size_t size
,
1146 enum object_type type
,
1147 const char *path
, unsigned flags
)
1149 struct strbuf nbuf
= STRBUF_INIT
;
1151 int write_object
= flags
& INDEX_WRITE_OBJECT
;
1157 * Convert blobs to git internal format
1159 if ((type
== OBJ_BLOB
) && path
) {
1160 if (convert_to_git(istate
, path
, buf
, size
, &nbuf
,
1161 get_conv_flags(flags
))) {
1166 if (flags
& INDEX_FORMAT_CHECK
) {
1167 struct fsck_options opts
= FSCK_OPTIONS_DEFAULT
;
1170 opts
.error_func
= hash_format_check_report
;
1171 if (fsck_buffer(null_oid(the_hash_algo
), type
, buf
, size
, &opts
))
1172 die(_("refusing to create malformed object"));
1177 ret
= write_object_file(buf
, size
, type
, oid
);
1179 hash_object_file(the_hash_algo
, buf
, size
, type
, oid
);
1181 strbuf_release(&nbuf
);
1185 static int index_stream_convert_blob(struct index_state
*istate
,
1186 struct object_id
*oid
,
1192 const int write_object
= flags
& INDEX_WRITE_OBJECT
;
1193 struct strbuf sbuf
= STRBUF_INIT
;
1196 ASSERT(would_convert_to_git_filter_fd(istate
, path
));
1198 convert_to_git_filter_fd(istate
, path
, fd
, &sbuf
,
1199 get_conv_flags(flags
));
1202 ret
= write_object_file(sbuf
.buf
, sbuf
.len
, OBJ_BLOB
,
1205 hash_object_file(the_hash_algo
, sbuf
.buf
, sbuf
.len
, OBJ_BLOB
,
1207 strbuf_release(&sbuf
);
1211 static int index_pipe(struct index_state
*istate
, struct object_id
*oid
,
1212 int fd
, enum object_type type
,
1213 const char *path
, unsigned flags
)
1215 struct strbuf sbuf
= STRBUF_INIT
;
1218 if (strbuf_read(&sbuf
, fd
, 4096) >= 0)
1219 ret
= index_mem(istate
, oid
, sbuf
.buf
, sbuf
.len
, type
, path
, flags
);
1222 strbuf_release(&sbuf
);
1226 #define SMALL_FILE_SIZE (32*1024)
1228 static int index_core(struct index_state
*istate
,
1229 struct object_id
*oid
, int fd
, size_t size
,
1230 enum object_type type
, const char *path
,
1236 ret
= index_mem(istate
, oid
, "", size
, type
, path
, flags
);
1237 } else if (size
<= SMALL_FILE_SIZE
) {
1238 char *buf
= xmalloc(size
);
1239 ssize_t read_result
= read_in_full(fd
, buf
, size
);
1240 if (read_result
< 0)
1241 ret
= error_errno(_("read error while indexing %s"),
1242 path
? path
: "<unknown>");
1243 else if (read_result
!= size
)
1244 ret
= error(_("short read while indexing %s"),
1245 path
? path
: "<unknown>");
1247 ret
= index_mem(istate
, oid
, buf
, size
, type
, path
, flags
);
1250 void *buf
= xmmap(NULL
, size
, PROT_READ
, MAP_PRIVATE
, fd
, 0);
1251 ret
= index_mem(istate
, oid
, buf
, size
, type
, path
, flags
);
1257 int index_fd(struct index_state
*istate
, struct object_id
*oid
,
1258 int fd
, struct stat
*st
,
1259 enum object_type type
, const char *path
, unsigned flags
)
1264 * Call xsize_t() only when needed to avoid potentially unnecessary
1265 * die() for large files.
1267 if (type
== OBJ_BLOB
&& path
&& would_convert_to_git_filter_fd(istate
, path
))
1268 ret
= index_stream_convert_blob(istate
, oid
, fd
, path
, flags
);
1269 else if (!S_ISREG(st
->st_mode
))
1270 ret
= index_pipe(istate
, oid
, fd
, type
, path
, flags
);
1271 else if (st
->st_size
<= repo_settings_get_big_file_threshold(the_repository
) ||
1273 (path
&& would_convert_to_git(istate
, path
)))
1274 ret
= index_core(istate
, oid
, fd
, xsize_t(st
->st_size
),
1277 ret
= index_blob_bulk_checkin(oid
, fd
, xsize_t(st
->st_size
), path
,
1283 int index_path(struct index_state
*istate
, struct object_id
*oid
,
1284 const char *path
, struct stat
*st
, unsigned flags
)
1287 struct strbuf sb
= STRBUF_INIT
;
1290 switch (st
->st_mode
& S_IFMT
) {
1292 fd
= open(path
, O_RDONLY
);
1294 return error_errno("open(\"%s\")", path
);
1295 if (index_fd(istate
, oid
, fd
, st
, OBJ_BLOB
, path
, flags
) < 0)
1296 return error(_("%s: failed to insert into database"),
1300 if (strbuf_readlink(&sb
, path
, st
->st_size
))
1301 return error_errno("readlink(\"%s\")", path
);
1302 if (!(flags
& INDEX_WRITE_OBJECT
))
1303 hash_object_file(the_hash_algo
, sb
.buf
, sb
.len
,
1305 else if (write_object_file(sb
.buf
, sb
.len
, OBJ_BLOB
, oid
))
1306 rc
= error(_("%s: failed to insert into database"), path
);
1307 strbuf_release(&sb
);
1310 return repo_resolve_gitlink_ref(the_repository
, path
, "HEAD", oid
);
1312 return error(_("%s: unsupported file type"), path
);
1317 int read_pack_header(int fd
, struct pack_header
*header
)
1319 if (read_in_full(fd
, header
, sizeof(*header
)) != sizeof(*header
))
1320 /* "eof before pack header was fully read" */
1321 return PH_ERROR_EOF
;
1323 if (header
->hdr_signature
!= htonl(PACK_SIGNATURE
))
1324 /* "protocol error (pack signature mismatch detected)" */
1325 return PH_ERROR_PACK_SIGNATURE
;
1326 if (!pack_version_ok(header
->hdr_version
))
1327 /* "protocol error (pack version unsupported)" */
1328 return PH_ERROR_PROTOCOL
;
1332 int for_each_file_in_obj_subdir(unsigned int subdir_nr
,
1333 struct strbuf
*path
,
1334 each_loose_object_fn obj_cb
,
1335 each_loose_cruft_fn cruft_cb
,
1336 each_loose_subdir_fn subdir_cb
,
1339 size_t origlen
, baselen
;
1343 struct object_id oid
;
1345 if (subdir_nr
> 0xff)
1346 BUG("invalid loose object subdirectory: %x", subdir_nr
);
1348 origlen
= path
->len
;
1349 strbuf_complete(path
, '/');
1350 strbuf_addf(path
, "%02x", subdir_nr
);
1352 dir
= opendir(path
->buf
);
1354 if (errno
!= ENOENT
)
1355 r
= error_errno(_("unable to open %s"), path
->buf
);
1356 strbuf_setlen(path
, origlen
);
1360 oid
.hash
[0] = subdir_nr
;
1361 strbuf_addch(path
, '/');
1362 baselen
= path
->len
;
1364 while ((de
= readdir_skip_dot_and_dotdot(dir
))) {
1367 namelen
= strlen(de
->d_name
);
1368 strbuf_setlen(path
, baselen
);
1369 strbuf_add(path
, de
->d_name
, namelen
);
1370 if (namelen
== the_hash_algo
->hexsz
- 2 &&
1371 !hex_to_bytes(oid
.hash
+ 1, de
->d_name
,
1372 the_hash_algo
->rawsz
- 1)) {
1373 oid_set_algo(&oid
, the_hash_algo
);
1374 memset(oid
.hash
+ the_hash_algo
->rawsz
, 0,
1375 GIT_MAX_RAWSZ
- the_hash_algo
->rawsz
);
1377 r
= obj_cb(&oid
, path
->buf
, data
);
1385 r
= cruft_cb(de
->d_name
, path
->buf
, data
);
1392 strbuf_setlen(path
, baselen
- 1);
1393 if (!r
&& subdir_cb
)
1394 r
= subdir_cb(subdir_nr
, path
->buf
, data
);
1396 strbuf_setlen(path
, origlen
);
1401 int for_each_loose_file_in_objdir_buf(struct strbuf
*path
,
1402 each_loose_object_fn obj_cb
,
1403 each_loose_cruft_fn cruft_cb
,
1404 each_loose_subdir_fn subdir_cb
,
1410 for (i
= 0; i
< 256; i
++) {
1411 r
= for_each_file_in_obj_subdir(i
, path
, obj_cb
, cruft_cb
,
1420 int for_each_loose_file_in_objdir(const char *path
,
1421 each_loose_object_fn obj_cb
,
1422 each_loose_cruft_fn cruft_cb
,
1423 each_loose_subdir_fn subdir_cb
,
1426 struct strbuf buf
= STRBUF_INIT
;
1429 strbuf_addstr(&buf
, path
);
1430 r
= for_each_loose_file_in_objdir_buf(&buf
, obj_cb
, cruft_cb
,
1432 strbuf_release(&buf
);
1437 int for_each_loose_object(each_loose_object_fn cb
, void *data
,
1438 enum for_each_object_flags flags
)
1440 struct odb_source
*source
;
1442 odb_prepare_alternates(the_repository
->objects
);
1443 for (source
= the_repository
->objects
->sources
; source
; source
= source
->next
) {
1444 int r
= for_each_loose_file_in_objdir(source
->path
, cb
, NULL
,
1449 if (flags
& FOR_EACH_OBJECT_LOCAL_ONLY
)
1456 static int append_loose_object(const struct object_id
*oid
,
1457 const char *path UNUSED
,
1460 oidtree_insert(data
, oid
);
1464 struct oidtree
*odb_loose_cache(struct odb_source
*source
,
1465 const struct object_id
*oid
)
1467 int subdir_nr
= oid
->hash
[0];
1468 struct strbuf buf
= STRBUF_INIT
;
1469 size_t word_bits
= bitsizeof(source
->loose_objects_subdir_seen
[0]);
1470 size_t word_index
= subdir_nr
/ word_bits
;
1471 size_t mask
= (size_t)1u << (subdir_nr
% word_bits
);
1474 if (subdir_nr
< 0 ||
1475 subdir_nr
>= bitsizeof(source
->loose_objects_subdir_seen
))
1476 BUG("subdir_nr out of range");
1478 bitmap
= &source
->loose_objects_subdir_seen
[word_index
];
1480 return source
->loose_objects_cache
;
1481 if (!source
->loose_objects_cache
) {
1482 ALLOC_ARRAY(source
->loose_objects_cache
, 1);
1483 oidtree_init(source
->loose_objects_cache
);
1485 strbuf_addstr(&buf
, source
->path
);
1486 for_each_file_in_obj_subdir(subdir_nr
, &buf
,
1487 append_loose_object
,
1489 source
->loose_objects_cache
);
1491 strbuf_release(&buf
);
1492 return source
->loose_objects_cache
;
1495 void odb_clear_loose_cache(struct odb_source
*source
)
1497 oidtree_clear(source
->loose_objects_cache
);
1498 FREE_AND_NULL(source
->loose_objects_cache
);
1499 memset(&source
->loose_objects_subdir_seen
, 0,
1500 sizeof(source
->loose_objects_subdir_seen
));
1503 static int check_stream_oid(git_zstream
*stream
,
1507 const struct object_id
*expected_oid
)
1509 struct git_hash_ctx c
;
1510 struct object_id real_oid
;
1511 unsigned char buf
[4096];
1512 unsigned long total_read
;
1515 the_hash_algo
->init_fn(&c
);
1516 git_hash_update(&c
, hdr
, stream
->total_out
);
1519 * We already read some bytes into hdr, but the ones up to the NUL
1520 * do not count against the object's content size.
1522 total_read
= stream
->total_out
- strlen(hdr
) - 1;
1525 * This size comparison must be "<=" to read the final zlib packets;
1526 * see the comment in unpack_loose_rest for details.
1528 while (total_read
<= size
&&
1530 (status
== Z_BUF_ERROR
&& !stream
->avail_out
))) {
1531 stream
->next_out
= buf
;
1532 stream
->avail_out
= sizeof(buf
);
1533 if (size
- total_read
< stream
->avail_out
)
1534 stream
->avail_out
= size
- total_read
;
1535 status
= git_inflate(stream
, Z_FINISH
);
1536 git_hash_update(&c
, buf
, stream
->next_out
- buf
);
1537 total_read
+= stream
->next_out
- buf
;
1540 if (status
!= Z_STREAM_END
) {
1541 error(_("corrupt loose object '%s'"), oid_to_hex(expected_oid
));
1544 if (stream
->avail_in
) {
1545 error(_("garbage at end of loose object '%s'"),
1546 oid_to_hex(expected_oid
));
1550 git_hash_final_oid(&real_oid
, &c
);
1551 if (!oideq(expected_oid
, &real_oid
)) {
1552 error(_("hash mismatch for %s (expected %s)"), path
,
1553 oid_to_hex(expected_oid
));
1560 int read_loose_object(const char *path
,
1561 const struct object_id
*expected_oid
,
1562 struct object_id
*real_oid
,
1564 struct object_info
*oi
)
1569 unsigned long mapsize
;
1571 char hdr
[MAX_HEADER_LEN
];
1572 unsigned long *size
= oi
->sizep
;
1574 fd
= git_open(path
);
1576 map
= map_fd(fd
, path
, &mapsize
);
1578 error_errno(_("unable to mmap %s"), path
);
1582 if (unpack_loose_header(&stream
, map
, mapsize
, hdr
, sizeof(hdr
)) != ULHR_OK
) {
1583 error(_("unable to unpack header of %s"), path
);
1587 if (parse_loose_header(hdr
, oi
) < 0) {
1588 error(_("unable to parse header of %s"), path
);
1592 if (*oi
->typep
< 0) {
1593 error(_("unable to parse type from header '%s' of %s"),
1598 if (*oi
->typep
== OBJ_BLOB
&&
1599 *size
> repo_settings_get_big_file_threshold(the_repository
)) {
1600 if (check_stream_oid(&stream
, hdr
, *size
, path
, expected_oid
) < 0)
1603 *contents
= unpack_loose_rest(&stream
, hdr
, *size
, expected_oid
);
1605 error(_("unable to unpack contents of %s"), path
);
1608 hash_object_file(the_repository
->hash_algo
,
1610 *oi
->typep
, real_oid
);
1611 if (!oideq(expected_oid
, real_oid
))
1615 ret
= 0; /* everything checks out */
1618 git_inflate_end(&stream
);
1621 munmap(map
, mapsize
);