object-file.c

   1 /*
   2  * GIT - The information manager from hell
   3  *
   4  * Copyright (C) Linus Torvalds, 2005
   5  *
   6  * This handles basic git object files - packing, unpacking,
   7  * creation etc.
   8  */
   9
  10 #define USE_THE_REPOSITORY_VARIABLE
  11 #define DISABLE_SIGN_COMPARE_WARNINGS
  12
  13 #include "git-compat-util.h"
  14 #include "bulk-checkin.h"
  15 #include "convert.h"
  16 #include "dir.h"
  17 #include "environment.h"
  18 #include "fsck.h"
  19 #include "gettext.h"
  20 #include "hex.h"
  21 #include "loose.h"
  22 #include "object-file-convert.h"
  23 #include "object-file.h"
  24 #include "odb.h"
  25 #include "oidtree.h"
  26 #include "pack.h"
  27 #include "packfile.h"
  28 #include "path.h"
  29 #include "setup.h"
  30 #include "streaming.h"
  31
  32 /* The maximum size for an object header. */
  33 #define MAX_HEADER_LEN 32
  34
  35 static int get_conv_flags(unsigned flags)
  36 {
  37         if (flags & INDEX_RENORMALIZE)
  38                 return CONV_EOL_RENORMALIZE;
  39         else if (flags & INDEX_WRITE_OBJECT)
  40                 return global_conv_flags_eol | CONV_WRITE_OBJECT;
  41         else
  42                 return 0;
  43 }
  44
  45 static void fill_loose_path(struct strbuf *buf, const struct object_id *oid)
  46 {
  47         int i;
  48         for (i = 0; i < the_hash_algo->rawsz; i++) {
  49                 static char hex[] = "0123456789abcdef";
  50                 unsigned int val = oid->hash[i];
  51                 strbuf_addch(buf, hex[val >> 4]);
  52                 strbuf_addch(buf, hex[val & 0xf]);
  53                 if (!i)
  54                         strbuf_addch(buf, '/');
  55         }
  56 }
  57
  58 const char *odb_loose_path(struct odb_source *source,
  59                            struct strbuf *buf,
  60                            const struct object_id *oid)
  61 {
  62         strbuf_reset(buf);
  63         strbuf_addstr(buf, source->path);
  64         strbuf_addch(buf, '/');
  65         fill_loose_path(buf, oid);
  66         return buf->buf;
  67 }
  68
  69 /* Returns 1 if we have successfully freshened the file, 0 otherwise. */
  70 static int freshen_file(const char *fn)
  71 {
  72         return !utime(fn, NULL);
  73 }
  74
  75 /*
  76  * All of the check_and_freshen functions return 1 if the file exists and was
  77  * freshened (if freshening was requested), 0 otherwise. If they return
  78  * 0, you should not assume that it is safe to skip a write of the object (it
  79  * either does not exist on disk, or has a stale mtime and may be subject to
  80  * pruning).
  81  */
  82 int check_and_freshen_file(const char *fn, int freshen)
  83 {
  84         if (access(fn, F_OK))
  85                 return 0;
  86         if (freshen && !freshen_file(fn))
  87                 return 0;
  88         return 1;
  89 }
  90
  91 static int check_and_freshen_odb(struct odb_source *source,
  92                                  const struct object_id *oid,
  93                                  int freshen)
  94 {
  95         static struct strbuf path = STRBUF_INIT;
  96         odb_loose_path(source, &path, oid);
  97         return check_and_freshen_file(path.buf, freshen);
  98 }
  99
 100 static int check_and_freshen_local(const struct object_id *oid, int freshen)
 101 {
 102         return check_and_freshen_odb(the_repository->objects->sources, oid, freshen);
 103 }
 104
 105 static int check_and_freshen_nonlocal(const struct object_id *oid, int freshen)
 106 {
 107         struct odb_source *source;
 108
 109         odb_prepare_alternates(the_repository->objects);
 110         for (source = the_repository->objects->sources->next; source; source = source->next) {
 111                 if (check_and_freshen_odb(source, oid, freshen))
 112                         return 1;
 113         }
 114         return 0;
 115 }
 116
 117 static int check_and_freshen(const struct object_id *oid, int freshen)
 118 {
 119         return check_and_freshen_local(oid, freshen) ||
 120                check_and_freshen_nonlocal(oid, freshen);
 121 }
 122
 123 int has_loose_object_nonlocal(const struct object_id *oid)
 124 {
 125         return check_and_freshen_nonlocal(oid, 0);
 126 }
 127
 128 int has_loose_object(const struct object_id *oid)
 129 {
 130         return check_and_freshen(oid, 0);
 131 }
 132
 133 int format_object_header(char *str, size_t size, enum object_type type,
 134                          size_t objsize)
 135 {
 136         const char *name = type_name(type);
 137
 138         if (!name)
 139                 BUG("could not get a type name for 'enum object_type' value %d", type);
 140
 141         return xsnprintf(str, size, "%s %"PRIuMAX, name, (uintmax_t)objsize) + 1;
 142 }
 143
 144 int check_object_signature(struct repository *r, const struct object_id *oid,
 145                            void *buf, unsigned long size,
 146                            enum object_type type)
 147 {
 148         const struct git_hash_algo *algo =
 149                 oid->algo ? &hash_algos[oid->algo] : r->hash_algo;
 150         struct object_id real_oid;
 151
 152         hash_object_file(algo, buf, size, type, &real_oid);
 153
 154         return !oideq(oid, &real_oid) ? -1 : 0;
 155 }
 156
 157 int stream_object_signature(struct repository *r, const struct object_id *oid)
 158 {
 159         struct object_id real_oid;
 160         unsigned long size;
 161         enum object_type obj_type;
 162         struct git_istream *st;
 163         struct git_hash_ctx c;
 164         char hdr[MAX_HEADER_LEN];
 165         int hdrlen;
 166
 167         st = open_istream(r, oid, &obj_type, &size, NULL);
 168         if (!st)
 169                 return -1;
 170
 171         /* Generate the header */
 172         hdrlen = format_object_header(hdr, sizeof(hdr), obj_type, size);
 173
 174         /* Sha1.. */
 175         r->hash_algo->init_fn(&c);
 176         git_hash_update(&c, hdr, hdrlen);
 177         for (;;) {
 178                 char buf[1024 * 16];
 179                 ssize_t readlen = read_istream(st, buf, sizeof(buf));
 180
 181                 if (readlen < 0) {
 182                         close_istream(st);
 183                         return -1;
 184                 }
 185                 if (!readlen)
 186                         break;
 187                 git_hash_update(&c, buf, readlen);
 188         }
 189         git_hash_final_oid(&real_oid, &c);
 190         close_istream(st);
 191         return !oideq(oid, &real_oid) ? -1 : 0;
 192 }
 193
 194 /*
 195  * Find "oid" as a loose object in the local repository or in an alternate.
 196  * Returns 0 on success, negative on failure.
 197  *
 198  * The "path" out-parameter will give the path of the object we found (if any).
 199  * Note that it may point to static storage and is only valid until another
 200  * call to stat_loose_object().
 201  */
 202 static int stat_loose_object(struct repository *r, const struct object_id *oid,
 203                              struct stat *st, const char **path)
 204 {
 205         struct odb_source *source;
 206         static struct strbuf buf = STRBUF_INIT;
 207
 208         odb_prepare_alternates(r->objects);
 209         for (source = r->objects->sources; source; source = source->next) {
 210                 *path = odb_loose_path(source, &buf, oid);
 211                 if (!lstat(*path, st))
 212                         return 0;
 213         }
 214
 215         return -1;
 216 }
 217
 218 /*
 219  * Like stat_loose_object(), but actually open the object and return the
 220  * descriptor. See the caveats on the "path" parameter above.
 221  */
 222 static int open_loose_object(struct repository *r,
 223                              const struct object_id *oid, const char **path)
 224 {
 225         int fd;
 226         struct odb_source *source;
 227         int most_interesting_errno = ENOENT;
 228         static struct strbuf buf = STRBUF_INIT;
 229
 230         odb_prepare_alternates(r->objects);
 231         for (source = r->objects->sources; source; source = source->next) {
 232                 *path = odb_loose_path(source, &buf, oid);
 233                 fd = git_open(*path);
 234                 if (fd >= 0)
 235                         return fd;
 236
 237                 if (most_interesting_errno == ENOENT)
 238                         most_interesting_errno = errno;
 239         }
 240         errno = most_interesting_errno;
 241         return -1;
 242 }
 243
 244 static int quick_has_loose(struct repository *r,
 245                            const struct object_id *oid)
 246 {
 247         struct odb_source *source;
 248
 249         odb_prepare_alternates(r->objects);
 250         for (source = r->objects->sources; source; source = source->next) {
 251                 if (oidtree_contains(odb_loose_cache(source, oid), oid))
 252                         return 1;
 253         }
 254         return 0;
 255 }
 256
 257 /*
 258  * Map and close the given loose object fd. The path argument is used for
 259  * error reporting.
 260  */
 261 static void *map_fd(int fd, const char *path, unsigned long *size)
 262 {
 263         void *map = NULL;
 264         struct stat st;
 265
 266         if (!fstat(fd, &st)) {
 267                 *size = xsize_t(st.st_size);
 268                 if (!*size) {
 269                         /* mmap() is forbidden on empty files */
 270                         error(_("object file %s is empty"), path);
 271                         close(fd);
 272                         return NULL;
 273                 }
 274                 map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0);
 275         }
 276         close(fd);
 277         return map;
 278 }
 279
 280 void *map_loose_object(struct repository *r,
 281                        const struct object_id *oid,
 282                        unsigned long *size)
 283 {
 284         const char *p;
 285         int fd = open_loose_object(r, oid, &p);
 286
 287         if (fd < 0)
 288                 return NULL;
 289         return map_fd(fd, p, size);
 290 }
 291
 292 enum unpack_loose_header_result unpack_loose_header(git_zstream *stream,
 293                                                     unsigned char *map,
 294                                                     unsigned long mapsize,
 295                                                     void *buffer,
 296                                                     unsigned long bufsiz)
 297 {
 298         int status;
 299
 300         /* Get the data stream */
 301         memset(stream, 0, sizeof(*stream));
 302         stream->next_in = map;
 303         stream->avail_in = mapsize;
 304         stream->next_out = buffer;
 305         stream->avail_out = bufsiz;
 306
 307         git_inflate_init(stream);
 308         obj_read_unlock();
 309         status = git_inflate(stream, 0);
 310         obj_read_lock();
 311         if (status != Z_OK && status != Z_STREAM_END)
 312                 return ULHR_BAD;
 313
 314         /*
 315          * Check if entire header is unpacked in the first iteration.
 316          */
 317         if (memchr(buffer, '\0', stream->next_out - (unsigned char *)buffer))
 318                 return ULHR_OK;
 319
 320         /*
 321          * We have a header longer than MAX_HEADER_LEN.
 322          */
 323         return ULHR_TOO_LONG;
 324 }
 325
 326 static void *unpack_loose_rest(git_zstream *stream,
 327                                void *buffer, unsigned long size,
 328                                const struct object_id *oid)
 329 {
 330         int bytes = strlen(buffer) + 1;
 331         unsigned char *buf = xmallocz(size);
 332         unsigned long n;
 333         int status = Z_OK;
 334
 335         n = stream->total_out - bytes;
 336         if (n > size)
 337                 n = size;
 338         memcpy(buf, (char *) buffer + bytes, n);
 339         bytes = n;
 340         if (bytes <= size) {
 341                 /*
 342                  * The above condition must be (bytes <= size), not
 343                  * (bytes < size).  In other words, even though we
 344                  * expect no more output and set avail_out to zero,
 345                  * the input zlib stream may have bytes that express
 346                  * "this concludes the stream", and we *do* want to
 347                  * eat that input.
 348                  *
 349                  * Otherwise we would not be able to test that we
 350                  * consumed all the input to reach the expected size;
 351                  * we also want to check that zlib tells us that all
 352                  * went well with status == Z_STREAM_END at the end.
 353                  */
 354                 stream->next_out = buf + bytes;
 355                 stream->avail_out = size - bytes;
 356                 while (status == Z_OK) {
 357                         obj_read_unlock();
 358                         status = git_inflate(stream, Z_FINISH);
 359                         obj_read_lock();
 360                 }
 361         }
 362
 363         if (status != Z_STREAM_END) {
 364                 error(_("corrupt loose object '%s'"), oid_to_hex(oid));
 365                 FREE_AND_NULL(buf);
 366         } else if (stream->avail_in) {
 367                 error(_("garbage at end of loose object '%s'"),
 368                       oid_to_hex(oid));
 369                 FREE_AND_NULL(buf);
 370         }
 371
 372         return buf;
 373 }
 374
 375 /*
 376  * We used to just use "sscanf()", but that's actually way
 377  * too permissive for what we want to check. So do an anal
 378  * object header parse by hand.
 379  */
 380 int parse_loose_header(const char *hdr, struct object_info *oi)
 381 {
 382         const char *type_buf = hdr;
 383         size_t size;
 384         int type, type_len = 0;
 385
 386         /*
 387          * The type can be of any size but is followed by
 388          * a space.
 389          */
 390         for (;;) {
 391                 char c = *hdr++;
 392                 if (!c)
 393                         return -1;
 394                 if (c == ' ')
 395                         break;
 396                 type_len++;
 397         }
 398
 399         type = type_from_string_gently(type_buf, type_len, 1);
 400         if (oi->typep)
 401                 *oi->typep = type;
 402
 403         /*
 404          * The length must follow immediately, and be in canonical
 405          * decimal format (ie "010" is not valid).
 406          */
 407         size = *hdr++ - '0';
 408         if (size > 9)
 409                 return -1;
 410         if (size) {
 411                 for (;;) {
 412                         unsigned long c = *hdr - '0';
 413                         if (c > 9)
 414                                 break;
 415                         hdr++;
 416                         size = st_add(st_mult(size, 10), c);
 417                 }
 418         }
 419
 420         if (oi->sizep)
 421                 *oi->sizep = cast_size_t_to_ulong(size);
 422
 423         /*
 424          * The length must be followed by a zero byte
 425          */
 426         if (*hdr)
 427                 return -1;
 428
 429         /*
 430          * The format is valid, but the type may still be bogus. The
 431          * Caller needs to check its oi->typep.
 432          */
 433         return 0;
 434 }
 435
 436 int loose_object_info(struct repository *r,
 437                       const struct object_id *oid,
 438                       struct object_info *oi, int flags)
 439 {
 440         int status = 0;
 441         int fd;
 442         unsigned long mapsize;
 443         const char *path;
 444         void *map;
 445         git_zstream stream;
 446         char hdr[MAX_HEADER_LEN];
 447         unsigned long size_scratch;
 448         enum object_type type_scratch;
 449
 450         if (oi->delta_base_oid)
 451                 oidclr(oi->delta_base_oid, the_repository->hash_algo);
 452
 453         /*
 454          * If we don't care about type or size, then we don't
 455          * need to look inside the object at all. Note that we
 456          * do not optimize out the stat call, even if the
 457          * caller doesn't care about the disk-size, since our
 458          * return value implicitly indicates whether the
 459          * object even exists.
 460          */
 461         if (!oi->typep && !oi->sizep && !oi->contentp) {
 462                 struct stat st;
 463                 if (!oi->disk_sizep && (flags & OBJECT_INFO_QUICK))
 464                         return quick_has_loose(r, oid) ? 0 : -1;
 465                 if (stat_loose_object(r, oid, &st, &path) < 0)
 466                         return -1;
 467                 if (oi->disk_sizep)
 468                         *oi->disk_sizep = st.st_size;
 469                 return 0;
 470         }
 471
 472         fd = open_loose_object(r, oid, &path);
 473         if (fd < 0) {
 474                 if (errno != ENOENT)
 475                         error_errno(_("unable to open loose object %s"), oid_to_hex(oid));
 476                 return -1;
 477         }
 478         map = map_fd(fd, path, &mapsize);
 479         if (!map)
 480                 return -1;
 481
 482         if (!oi->sizep)
 483                 oi->sizep = &size_scratch;
 484         if (!oi->typep)
 485                 oi->typep = &type_scratch;
 486
 487         if (oi->disk_sizep)
 488                 *oi->disk_sizep = mapsize;
 489
 490         switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) {
 491         case ULHR_OK:
 492                 if (parse_loose_header(hdr, oi) < 0)
 493                         status = error(_("unable to parse %s header"), oid_to_hex(oid));
 494                 else if (*oi->typep < 0)
 495                         die(_("invalid object type"));
 496
 497                 if (!oi->contentp)
 498                         break;
 499                 *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid);
 500                 if (*oi->contentp)
 501                         goto cleanup;
 502
 503                 status = -1;
 504                 break;
 505         case ULHR_BAD:
 506                 status = error(_("unable to unpack %s header"),
 507                                oid_to_hex(oid));
 508                 break;
 509         case ULHR_TOO_LONG:
 510                 status = error(_("header for %s too long, exceeds %d bytes"),
 511                                oid_to_hex(oid), MAX_HEADER_LEN);
 512                 break;
 513         }
 514
 515         if (status && (flags & OBJECT_INFO_DIE_IF_CORRUPT))
 516                 die(_("loose object %s (stored in %s) is corrupt"),
 517                     oid_to_hex(oid), path);
 518
 519 cleanup:
 520         git_inflate_end(&stream);
 521         munmap(map, mapsize);
 522         if (oi->sizep == &size_scratch)
 523                 oi->sizep = NULL;
 524         if (oi->typep == &type_scratch)
 525                 oi->typep = NULL;
 526         oi->whence = OI_LOOSE;
 527         return status;
 528 }
 529
 530 static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c,
 531                              const void *buf, unsigned long len,
 532                              struct object_id *oid,
 533                              char *hdr, int *hdrlen)
 534 {
 535         algo->init_fn(c);
 536         git_hash_update(c, hdr, *hdrlen);
 537         git_hash_update(c, buf, len);
 538         git_hash_final_oid(oid, c);
 539 }
 540
 541 static void write_object_file_prepare(const struct git_hash_algo *algo,
 542                                       const void *buf, unsigned long len,
 543                                       enum object_type type, struct object_id *oid,
 544                                       char *hdr, int *hdrlen)
 545 {
 546         struct git_hash_ctx c;
 547
 548         /* Generate the header */
 549         *hdrlen = format_object_header(hdr, *hdrlen, type, len);
 550
 551         /* Sha1.. */
 552         hash_object_body(algo, &c, buf, len, oid, hdr, hdrlen);
 553 }
 554
 555 #define CHECK_COLLISION_DEST_VANISHED -2
 556
 557 static int check_collision(const char *source, const char *dest)
 558 {
 559         char buf_source[4096], buf_dest[4096];
 560         int fd_source = -1, fd_dest = -1;
 561         int ret = 0;
 562
 563         fd_source = open(source, O_RDONLY);
 564         if (fd_source < 0) {
 565                 ret = error_errno(_("unable to open %s"), source);
 566                 goto out;
 567         }
 568
 569         fd_dest = open(dest, O_RDONLY);
 570         if (fd_dest < 0) {
 571                 if (errno != ENOENT)
 572                         ret = error_errno(_("unable to open %s"), dest);
 573                 else
 574                         ret = CHECK_COLLISION_DEST_VANISHED;
 575                 goto out;
 576         }
 577
 578         while (1) {
 579                 ssize_t sz_a, sz_b;
 580
 581                 sz_a = read_in_full(fd_source, buf_source, sizeof(buf_source));
 582                 if (sz_a < 0) {
 583                         ret = error_errno(_("unable to read %s"), source);
 584                         goto out;
 585                 }
 586
 587                 sz_b = read_in_full(fd_dest, buf_dest, sizeof(buf_dest));
 588                 if (sz_b < 0) {
 589                         ret = error_errno(_("unable to read %s"), dest);
 590                         goto out;
 591                 }
 592
 593                 if (sz_a != sz_b || memcmp(buf_source, buf_dest, sz_a)) {
 594                         ret = error(_("files '%s' and '%s' differ in contents"),
 595                                     source, dest);
 596                         goto out;
 597                 }
 598
 599                 if (sz_a < sizeof(buf_source))
 600                         break;
 601         }
 602
 603 out:
 604         if (fd_source > -1)
 605                 close(fd_source);
 606         if (fd_dest > -1)
 607                 close(fd_dest);
 608         return ret;
 609 }
 610
 611 /*
 612  * Move the just written object into its final resting place.
 613  */
 614 int finalize_object_file(const char *tmpfile, const char *filename)
 615 {
 616         return finalize_object_file_flags(tmpfile, filename, 0);
 617 }
 618
 619 int finalize_object_file_flags(const char *tmpfile, const char *filename,
 620                                enum finalize_object_file_flags flags)
 621 {
 622         unsigned retries = 0;
 623         int ret;
 624
 625 retry:
 626         ret = 0;
 627
 628         if (object_creation_mode == OBJECT_CREATION_USES_RENAMES)
 629                 goto try_rename;
 630         else if (link(tmpfile, filename))
 631                 ret = errno;
 632         else
 633                 unlink_or_warn(tmpfile);
 634
 635         /*
 636          * Coda hack - coda doesn't like cross-directory links,
 637          * so we fall back to a rename, which will mean that it
 638          * won't be able to check collisions, but that's not a
 639          * big deal.
 640          *
 641          * The same holds for FAT formatted media.
 642          *
 643          * When this succeeds, we just return.  We have nothing
 644          * left to unlink.
 645          */
 646         if (ret && ret != EEXIST) {
 647                 struct stat st;
 648
 649         try_rename:
 650                 if (!stat(filename, &st))
 651                         ret = EEXIST;
 652                 else if (!rename(tmpfile, filename))
 653                         goto out;
 654                 else
 655                         ret = errno;
 656         }
 657         if (ret) {
 658                 if (ret != EEXIST) {
 659                         int saved_errno = errno;
 660                         unlink_or_warn(tmpfile);
 661                         errno = saved_errno;
 662                         return error_errno(_("unable to write file %s"), filename);
 663                 }
 664                 if (!(flags & FOF_SKIP_COLLISION_CHECK)) {
 665                         ret = check_collision(tmpfile, filename);
 666                         if (ret == CHECK_COLLISION_DEST_VANISHED) {
 667                                 if (retries++ > 5)
 668                                         return error(_("unable to write repeatedly vanishing file %s"),
 669                                                      filename);
 670                                 goto retry;
 671                         }
 672                         else if (ret)
 673                                 return -1;
 674                 }
 675                 unlink_or_warn(tmpfile);
 676         }
 677
 678 out:
 679         if (adjust_shared_perm(the_repository, filename))
 680                 return error(_("unable to set permission to '%s'"), filename);
 681         return 0;
 682 }
 683
 684 void hash_object_file(const struct git_hash_algo *algo, const void *buf,
 685                       unsigned long len, enum object_type type,
 686                       struct object_id *oid)
 687 {
 688         char hdr[MAX_HEADER_LEN];
 689         int hdrlen = sizeof(hdr);
 690
 691         write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
 692 }
 693
 694 /* Finalize a file on disk, and close it. */
 695 static void close_loose_object(int fd, const char *filename)
 696 {
 697         if (the_repository->objects->sources->will_destroy)
 698                 goto out;
 699
 700         if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
 701                 fsync_loose_object_bulk_checkin(fd, filename);
 702         else if (fsync_object_files > 0)
 703                 fsync_or_die(fd, filename);
 704         else
 705                 fsync_component_or_die(FSYNC_COMPONENT_LOOSE_OBJECT, fd,
 706                                        filename);
 707
 708 out:
 709         if (close(fd) != 0)
 710                 die_errno(_("error when closing loose object file"));
 711 }
 712
 713 /* Size of directory component, including the ending '/' */
 714 static inline int directory_size(const char *filename)
 715 {
 716         const char *s = strrchr(filename, '/');
 717         if (!s)
 718                 return 0;
 719         return s - filename + 1;
 720 }
 721
 722 /*
 723  * This creates a temporary file in the same directory as the final
 724  * 'filename'
 725  *
 726  * We want to avoid cross-directory filename renames, because those
 727  * can have problems on various filesystems (FAT, NFS, Coda).
 728  */
 729 static int create_tmpfile(struct strbuf *tmp, const char *filename)
 730 {
 731         int fd, dirlen = directory_size(filename);
 732
 733         strbuf_reset(tmp);
 734         strbuf_add(tmp, filename, dirlen);
 735         strbuf_addstr(tmp, "tmp_obj_XXXXXX");
 736         fd = git_mkstemp_mode(tmp->buf, 0444);
 737         if (fd < 0 && dirlen && errno == ENOENT) {
 738                 /*
 739                  * Make sure the directory exists; note that the contents
 740                  * of the buffer are undefined after mkstemp returns an
 741                  * error, so we have to rewrite the whole buffer from
 742                  * scratch.
 743                  */
 744                 strbuf_reset(tmp);
 745                 strbuf_add(tmp, filename, dirlen - 1);
 746                 if (mkdir(tmp->buf, 0777) && errno != EEXIST)
 747                         return -1;
 748                 if (adjust_shared_perm(the_repository, tmp->buf))
 749                         return -1;
 750
 751                 /* Try again */
 752                 strbuf_addstr(tmp, "/tmp_obj_XXXXXX");
 753                 fd = git_mkstemp_mode(tmp->buf, 0444);
 754         }
 755         return fd;
 756 }
 757
 758 /**
 759  * Common steps for loose object writers to start writing loose
 760  * objects:
 761  *
 762  * - Create tmpfile for the loose object.
 763  * - Setup zlib stream for compression.
 764  * - Start to feed header to zlib stream.
 765  *
 766  * Returns a "fd", which should later be provided to
 767  * end_loose_object_common().
 768  */
 769 static int start_loose_object_common(struct strbuf *tmp_file,
 770                                      const char *filename, unsigned flags,
 771                                      git_zstream *stream,
 772                                      unsigned char *buf, size_t buflen,
 773                                      struct git_hash_ctx *c, struct git_hash_ctx *compat_c,
 774                                      char *hdr, int hdrlen)
 775 {
 776         struct repository *repo = the_repository;
 777         const struct git_hash_algo *algo = repo->hash_algo;
 778         const struct git_hash_algo *compat = repo->compat_hash_algo;
 779         int fd;
 780
 781         fd = create_tmpfile(tmp_file, filename);
 782         if (fd < 0) {
 783                 if (flags & WRITE_OBJECT_FILE_SILENT)
 784                         return -1;
 785                 else if (errno == EACCES)
 786                         return error(_("insufficient permission for adding "
 787                                        "an object to repository database %s"),
 788                                      repo_get_object_directory(the_repository));
 789                 else
 790                         return error_errno(
 791                                 _("unable to create temporary file"));
 792         }
 793
 794         /*  Setup zlib stream for compression */
 795         git_deflate_init(stream, zlib_compression_level);
 796         stream->next_out = buf;
 797         stream->avail_out = buflen;
 798         algo->init_fn(c);
 799         if (compat && compat_c)
 800                 compat->init_fn(compat_c);
 801
 802         /*  Start to feed header to zlib stream */
 803         stream->next_in = (unsigned char *)hdr;
 804         stream->avail_in = hdrlen;
 805         while (git_deflate(stream, 0) == Z_OK)
 806                 ; /* nothing */
 807         git_hash_update(c, hdr, hdrlen);
 808         if (compat && compat_c)
 809                 git_hash_update(compat_c, hdr, hdrlen);
 810
 811         return fd;
 812 }
 813
 814 /**
 815  * Common steps for the inner git_deflate() loop for writing loose
 816  * objects. Returns what git_deflate() returns.
 817  */
 818 static int write_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx *compat_c,
 819                                      git_zstream *stream, const int flush,
 820                                      unsigned char *in0, const int fd,
 821                                      unsigned char *compressed,
 822                                      const size_t compressed_len)
 823 {
 824         struct repository *repo = the_repository;
 825         const struct git_hash_algo *compat = repo->compat_hash_algo;
 826         int ret;
 827
 828         ret = git_deflate(stream, flush ? Z_FINISH : 0);
 829         git_hash_update(c, in0, stream->next_in - in0);
 830         if (compat && compat_c)
 831                 git_hash_update(compat_c, in0, stream->next_in - in0);
 832         if (write_in_full(fd, compressed, stream->next_out - compressed) < 0)
 833                 die_errno(_("unable to write loose object file"));
 834         stream->next_out = compressed;
 835         stream->avail_out = compressed_len;
 836
 837         return ret;
 838 }
 839
 840 /**
 841  * Common steps for loose object writers to end writing loose objects:
 842  *
 843  * - End the compression of zlib stream.
 844  * - Get the calculated oid to "oid".
 845  */
 846 static int end_loose_object_common(struct git_hash_ctx *c, struct git_hash_ctx *compat_c,
 847                                    git_zstream *stream, struct object_id *oid,
 848                                    struct object_id *compat_oid)
 849 {
 850         struct repository *repo = the_repository;
 851         const struct git_hash_algo *compat = repo->compat_hash_algo;
 852         int ret;
 853
 854         ret = git_deflate_end_gently(stream);
 855         if (ret != Z_OK)
 856                 return ret;
 857         git_hash_final_oid(oid, c);
 858         if (compat && compat_c)
 859                 git_hash_final_oid(compat_oid, compat_c);
 860
 861         return Z_OK;
 862 }
 863
 864 static int write_loose_object(const struct object_id *oid, char *hdr,
 865                               int hdrlen, const void *buf, unsigned long len,
 866                               time_t mtime, unsigned flags)
 867 {
 868         int fd, ret;
 869         unsigned char compressed[4096];
 870         git_zstream stream;
 871         struct git_hash_ctx c;
 872         struct object_id parano_oid;
 873         static struct strbuf tmp_file = STRBUF_INIT;
 874         static struct strbuf filename = STRBUF_INIT;
 875
 876         if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
 877                 prepare_loose_object_bulk_checkin();
 878
 879         odb_loose_path(the_repository->objects->sources, &filename, oid);
 880
 881         fd = start_loose_object_common(&tmp_file, filename.buf, flags,
 882                                        &stream, compressed, sizeof(compressed),
 883                                        &c, NULL, hdr, hdrlen);
 884         if (fd < 0)
 885                 return -1;
 886
 887         /* Then the data itself.. */
 888         stream.next_in = (void *)buf;
 889         stream.avail_in = len;
 890         do {
 891                 unsigned char *in0 = stream.next_in;
 892
 893                 ret = write_loose_object_common(&c, NULL, &stream, 1, in0, fd,
 894                                                 compressed, sizeof(compressed));
 895         } while (ret == Z_OK);
 896
 897         if (ret != Z_STREAM_END)
 898                 die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid),
 899                     ret);
 900         ret = end_loose_object_common(&c, NULL, &stream, &parano_oid, NULL);
 901         if (ret != Z_OK)
 902                 die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid),
 903                     ret);
 904         if (!oideq(oid, &parano_oid))
 905                 die(_("confused by unstable object source data for %s"),
 906                     oid_to_hex(oid));
 907
 908         close_loose_object(fd, tmp_file.buf);
 909
 910         if (mtime) {
 911                 struct utimbuf utb;
 912                 utb.actime = mtime;
 913                 utb.modtime = mtime;
 914                 if (utime(tmp_file.buf, &utb) < 0 &&
 915                     !(flags & WRITE_OBJECT_FILE_SILENT))
 916                         warning_errno(_("failed utime() on %s"), tmp_file.buf);
 917         }
 918
 919         return finalize_object_file_flags(tmp_file.buf, filename.buf,
 920                                           FOF_SKIP_COLLISION_CHECK);
 921 }
 922
 923 static int freshen_loose_object(const struct object_id *oid)
 924 {
 925         return check_and_freshen(oid, 1);
 926 }
 927
 928 static int freshen_packed_object(const struct object_id *oid)
 929 {
 930         struct pack_entry e;
 931         if (!find_pack_entry(the_repository, oid, &e))
 932                 return 0;
 933         if (e.p->is_cruft)
 934                 return 0;
 935         if (e.p->freshened)
 936                 return 1;
 937         if (!freshen_file(e.p->pack_name))
 938                 return 0;
 939         e.p->freshened = 1;
 940         return 1;
 941 }
 942
 943 int stream_loose_object(struct input_stream *in_stream, size_t len,
 944                         struct object_id *oid)
 945 {
 946         const struct git_hash_algo *compat = the_repository->compat_hash_algo;
 947         struct object_id compat_oid;
 948         int fd, ret, err = 0, flush = 0;
 949         unsigned char compressed[4096];
 950         git_zstream stream;
 951         struct git_hash_ctx c, compat_c;
 952         struct strbuf tmp_file = STRBUF_INIT;
 953         struct strbuf filename = STRBUF_INIT;
 954         int dirlen;
 955         char hdr[MAX_HEADER_LEN];
 956         int hdrlen;
 957
 958         if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT))
 959                 prepare_loose_object_bulk_checkin();
 960
 961         /* Since oid is not determined, save tmp file to odb path. */
 962         strbuf_addf(&filename, "%s/", repo_get_object_directory(the_repository));
 963         hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len);
 964
 965         /*
 966          * Common steps for write_loose_object and stream_loose_object to
 967          * start writing loose objects:
 968          *
 969          *  - Create tmpfile for the loose object.
 970          *  - Setup zlib stream for compression.
 971          *  - Start to feed header to zlib stream.
 972          */
 973         fd = start_loose_object_common(&tmp_file, filename.buf, 0,
 974                                        &stream, compressed, sizeof(compressed),
 975                                        &c, &compat_c, hdr, hdrlen);
 976         if (fd < 0) {
 977                 err = -1;
 978                 goto cleanup;
 979         }
 980
 981         /* Then the data itself.. */
 982         do {
 983                 unsigned char *in0 = stream.next_in;
 984
 985                 if (!stream.avail_in && !in_stream->is_finished) {
 986                         const void *in = in_stream->read(in_stream, &stream.avail_in);
 987                         stream.next_in = (void *)in;
 988                         in0 = (unsigned char *)in;
 989                         /* All data has been read. */
 990                         if (in_stream->is_finished)
 991                                 flush = 1;
 992                 }
 993                 ret = write_loose_object_common(&c, &compat_c, &stream, flush, in0, fd,
 994                                                 compressed, sizeof(compressed));
 995                 /*
 996                  * Unlike write_loose_object(), we do not have the entire
 997                  * buffer. If we get Z_BUF_ERROR due to too few input bytes,
 998                  * then we'll replenish them in the next input_stream->read()
 999                  * call when we loop.
1000                  */
1001         } while (ret == Z_OK || ret == Z_BUF_ERROR);
1002
1003         if (stream.total_in != len + hdrlen)
1004                 die(_("write stream object %ld != %"PRIuMAX), stream.total_in,
1005                     (uintmax_t)len + hdrlen);
1006
1007         /*
1008          * Common steps for write_loose_object and stream_loose_object to
1009          * end writing loose object:
1010          *
1011          *  - End the compression of zlib stream.
1012          *  - Get the calculated oid.
1013          */
1014         if (ret != Z_STREAM_END)
1015                 die(_("unable to stream deflate new object (%d)"), ret);
1016         ret = end_loose_object_common(&c, &compat_c, &stream, oid, &compat_oid);
1017         if (ret != Z_OK)
1018                 die(_("deflateEnd on stream object failed (%d)"), ret);
1019         close_loose_object(fd, tmp_file.buf);
1020
1021         if (freshen_packed_object(oid) || freshen_loose_object(oid)) {
1022                 unlink_or_warn(tmp_file.buf);
1023                 goto cleanup;
1024         }
1025
1026         odb_loose_path(the_repository->objects->sources, &filename, oid);
1027
1028         /* We finally know the object path, and create the missing dir. */
1029         dirlen = directory_size(filename.buf);
1030         if (dirlen) {
1031                 struct strbuf dir = STRBUF_INIT;
1032                 strbuf_add(&dir, filename.buf, dirlen);
1033
1034                 if (safe_create_dir_in_gitdir(the_repository, dir.buf) &&
1035                     errno != EEXIST) {
1036                         err = error_errno(_("unable to create directory %s"), dir.buf);
1037                         strbuf_release(&dir);
1038                         goto cleanup;
1039                 }
1040                 strbuf_release(&dir);
1041         }
1042
1043         err = finalize_object_file_flags(tmp_file.buf, filename.buf,
1044                                          FOF_SKIP_COLLISION_CHECK);
1045         if (!err && compat)
1046                 err = repo_add_loose_object_map(the_repository, oid, &compat_oid);
1047 cleanup:
1048         strbuf_release(&tmp_file);
1049         strbuf_release(&filename);
1050         return err;
1051 }
1052
1053 int write_object_file_flags(const void *buf, unsigned long len,
1054                             enum object_type type, struct object_id *oid,
1055                             struct object_id *compat_oid_in, unsigned flags)
1056 {
1057         struct repository *repo = the_repository;
1058         const struct git_hash_algo *algo = repo->hash_algo;
1059         const struct git_hash_algo *compat = repo->compat_hash_algo;
1060         struct object_id compat_oid;
1061         char hdr[MAX_HEADER_LEN];
1062         int hdrlen = sizeof(hdr);
1063
1064         /* Generate compat_oid */
1065         if (compat) {
1066                 if (compat_oid_in)
1067                         oidcpy(&compat_oid, compat_oid_in);
1068                 else if (type == OBJ_BLOB)
1069                         hash_object_file(compat, buf, len, type, &compat_oid);
1070                 else {
1071                         struct strbuf converted = STRBUF_INIT;
1072                         convert_object_file(the_repository, &converted, algo, compat,
1073                                             buf, len, type, 0);
1074                         hash_object_file(compat, converted.buf, converted.len,
1075                                          type, &compat_oid);
1076                         strbuf_release(&converted);
1077                 }
1078         }
1079
1080         /* Normally if we have it in the pack then we do not bother writing
1081          * it out into .git/objects/??/?{38} file.
1082          */
1083         write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen);
1084         if (freshen_packed_object(oid) || freshen_loose_object(oid))
1085                 return 0;
1086         if (write_loose_object(oid, hdr, hdrlen, buf, len, 0, flags))
1087                 return -1;
1088         if (compat)
1089                 return repo_add_loose_object_map(repo, oid, &compat_oid);
1090         return 0;
1091 }
1092
1093 int force_object_loose(const struct object_id *oid, time_t mtime)
1094 {
1095         struct repository *repo = the_repository;
1096         const struct git_hash_algo *compat = repo->compat_hash_algo;
1097         void *buf;
1098         unsigned long len;
1099         struct object_info oi = OBJECT_INFO_INIT;
1100         struct object_id compat_oid;
1101         enum object_type type;
1102         char hdr[MAX_HEADER_LEN];
1103         int hdrlen;
1104         int ret;
1105
1106         if (has_loose_object(oid))
1107                 return 0;
1108         oi.typep = &type;
1109         oi.sizep = &len;
1110         oi.contentp = &buf;
1111         if (odb_read_object_info_extended(the_repository->objects, oid, &oi, 0))
1112                 return error(_("cannot read object for %s"), oid_to_hex(oid));
1113         if (compat) {
1114                 if (repo_oid_to_algop(repo, oid, compat, &compat_oid))
1115                         return error(_("cannot map object %s to %s"),
1116                                      oid_to_hex(oid), compat->name);
1117         }
1118         hdrlen = format_object_header(hdr, sizeof(hdr), type, len);
1119         ret = write_loose_object(oid, hdr, hdrlen, buf, len, mtime, 0);
1120         if (!ret && compat)
1121                 ret = repo_add_loose_object_map(the_repository, oid, &compat_oid);
1122         free(buf);
1123
1124         return ret;
1125 }
1126
1127 /*
1128  * We can't use the normal fsck_error_function() for index_mem(),
1129  * because we don't yet have a valid oid for it to report. Instead,
1130  * report the minimal fsck error here, and rely on the caller to
1131  * give more context.
1132  */
1133 static int hash_format_check_report(struct fsck_options *opts UNUSED,
1134                                     void *fsck_report UNUSED,
1135                                     enum fsck_msg_type msg_type UNUSED,
1136                                     enum fsck_msg_id msg_id UNUSED,
1137                                     const char *message)
1138 {
1139         error(_("object fails fsck: %s"), message);
1140         return 1;
1141 }
1142
1143 static int index_mem(struct index_state *istate,
1144                      struct object_id *oid,
1145                      const void *buf, size_t size,
1146                      enum object_type type,
1147                      const char *path, unsigned flags)
1148 {
1149         struct strbuf nbuf = STRBUF_INIT;
1150         int ret = 0;
1151         int write_object = flags & INDEX_WRITE_OBJECT;
1152
1153         if (!type)
1154                 type = OBJ_BLOB;
1155
1156         /*
1157          * Convert blobs to git internal format
1158          */
1159         if ((type == OBJ_BLOB) && path) {
1160                 if (convert_to_git(istate, path, buf, size, &nbuf,
1161                                    get_conv_flags(flags))) {
1162                         buf = nbuf.buf;
1163                         size = nbuf.len;
1164                 }
1165         }
1166         if (flags & INDEX_FORMAT_CHECK) {
1167                 struct fsck_options opts = FSCK_OPTIONS_DEFAULT;
1168
1169                 opts.strict = 1;
1170                 opts.error_func = hash_format_check_report;
1171                 if (fsck_buffer(null_oid(the_hash_algo), type, buf, size, &opts))
1172                         die(_("refusing to create malformed object"));
1173                 fsck_finish(&opts);
1174         }
1175
1176         if (write_object)
1177                 ret = write_object_file(buf, size, type, oid);
1178         else
1179                 hash_object_file(the_hash_algo, buf, size, type, oid);
1180
1181         strbuf_release(&nbuf);
1182         return ret;
1183 }
1184
1185 static int index_stream_convert_blob(struct index_state *istate,
1186                                      struct object_id *oid,
1187                                      int fd,
1188                                      const char *path,
1189                                      unsigned flags)
1190 {
1191         int ret = 0;
1192         const int write_object = flags & INDEX_WRITE_OBJECT;
1193         struct strbuf sbuf = STRBUF_INIT;
1194
1195         assert(path);
1196         ASSERT(would_convert_to_git_filter_fd(istate, path));
1197
1198         convert_to_git_filter_fd(istate, path, fd, &sbuf,
1199                                  get_conv_flags(flags));
1200
1201         if (write_object)
1202                 ret = write_object_file(sbuf.buf, sbuf.len, OBJ_BLOB,
1203                                         oid);
1204         else
1205                 hash_object_file(the_hash_algo, sbuf.buf, sbuf.len, OBJ_BLOB,
1206                                  oid);
1207         strbuf_release(&sbuf);
1208         return ret;
1209 }
1210
1211 static int index_pipe(struct index_state *istate, struct object_id *oid,
1212                       int fd, enum object_type type,
1213                       const char *path, unsigned flags)
1214 {
1215         struct strbuf sbuf = STRBUF_INIT;
1216         int ret;
1217
1218         if (strbuf_read(&sbuf, fd, 4096) >= 0)
1219                 ret = index_mem(istate, oid, sbuf.buf, sbuf.len, type, path, flags);
1220         else
1221                 ret = -1;
1222         strbuf_release(&sbuf);
1223         return ret;
1224 }
1225
1226 #define SMALL_FILE_SIZE (32*1024)
1227
1228 static int index_core(struct index_state *istate,
1229                       struct object_id *oid, int fd, size_t size,
1230                       enum object_type type, const char *path,
1231                       unsigned flags)
1232 {
1233         int ret;
1234
1235         if (!size) {
1236                 ret = index_mem(istate, oid, "", size, type, path, flags);
1237         } else if (size <= SMALL_FILE_SIZE) {
1238                 char *buf = xmalloc(size);
1239                 ssize_t read_result = read_in_full(fd, buf, size);
1240                 if (read_result < 0)
1241                         ret = error_errno(_("read error while indexing %s"),
1242                                           path ? path : "<unknown>");
1243                 else if (read_result != size)
1244                         ret = error(_("short read while indexing %s"),
1245                                     path ? path : "<unknown>");
1246                 else
1247                         ret = index_mem(istate, oid, buf, size, type, path, flags);
1248                 free(buf);
1249         } else {
1250                 void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
1251                 ret = index_mem(istate, oid, buf, size, type, path, flags);
1252                 munmap(buf, size);
1253         }
1254         return ret;
1255 }
1256
1257 int index_fd(struct index_state *istate, struct object_id *oid,
1258              int fd, struct stat *st,
1259              enum object_type type, const char *path, unsigned flags)
1260 {
1261         int ret;
1262
1263         /*
1264          * Call xsize_t() only when needed to avoid potentially unnecessary
1265          * die() for large files.
1266          */
1267         if (type == OBJ_BLOB && path && would_convert_to_git_filter_fd(istate, path))
1268                 ret = index_stream_convert_blob(istate, oid, fd, path, flags);
1269         else if (!S_ISREG(st->st_mode))
1270                 ret = index_pipe(istate, oid, fd, type, path, flags);
1271         else if (st->st_size <= repo_settings_get_big_file_threshold(the_repository) ||
1272                  type != OBJ_BLOB ||
1273                  (path && would_convert_to_git(istate, path)))
1274                 ret = index_core(istate, oid, fd, xsize_t(st->st_size),
1275                                  type, path, flags);
1276         else
1277                 ret = index_blob_bulk_checkin(oid, fd, xsize_t(st->st_size), path,
1278                                              flags);
1279         close(fd);
1280         return ret;
1281 }
1282
1283 int index_path(struct index_state *istate, struct object_id *oid,
1284                const char *path, struct stat *st, unsigned flags)
1285 {
1286         int fd;
1287         struct strbuf sb = STRBUF_INIT;
1288         int rc = 0;
1289
1290         switch (st->st_mode & S_IFMT) {
1291         case S_IFREG:
1292                 fd = open(path, O_RDONLY);
1293                 if (fd < 0)
1294                         return error_errno("open(\"%s\")", path);
1295                 if (index_fd(istate, oid, fd, st, OBJ_BLOB, path, flags) < 0)
1296                         return error(_("%s: failed to insert into database"),
1297                                      path);
1298                 break;
1299         case S_IFLNK:
1300                 if (strbuf_readlink(&sb, path, st->st_size))
1301                         return error_errno("readlink(\"%s\")", path);
1302                 if (!(flags & INDEX_WRITE_OBJECT))
1303                         hash_object_file(the_hash_algo, sb.buf, sb.len,
1304                                          OBJ_BLOB, oid);
1305                 else if (write_object_file(sb.buf, sb.len, OBJ_BLOB, oid))
1306                         rc = error(_("%s: failed to insert into database"), path);
1307                 strbuf_release(&sb);
1308                 break;
1309         case S_IFDIR:
1310                 return repo_resolve_gitlink_ref(the_repository, path, "HEAD", oid);
1311         default:
1312                 return error(_("%s: unsupported file type"), path);
1313         }
1314         return rc;
1315 }
1316
1317 int read_pack_header(int fd, struct pack_header *header)
1318 {
1319         if (read_in_full(fd, header, sizeof(*header)) != sizeof(*header))
1320                 /* "eof before pack header was fully read" */
1321                 return PH_ERROR_EOF;
1322
1323         if (header->hdr_signature != htonl(PACK_SIGNATURE))
1324                 /* "protocol error (pack signature mismatch detected)" */
1325                 return PH_ERROR_PACK_SIGNATURE;
1326         if (!pack_version_ok(header->hdr_version))
1327                 /* "protocol error (pack version unsupported)" */
1328                 return PH_ERROR_PROTOCOL;
1329         return 0;
1330 }
1331
1332 int for_each_file_in_obj_subdir(unsigned int subdir_nr,
1333                                 struct strbuf *path,
1334                                 each_loose_object_fn obj_cb,
1335                                 each_loose_cruft_fn cruft_cb,
1336                                 each_loose_subdir_fn subdir_cb,
1337                                 void *data)
1338 {
1339         size_t origlen, baselen;
1340         DIR *dir;
1341         struct dirent *de;
1342         int r = 0;
1343         struct object_id oid;
1344
1345         if (subdir_nr > 0xff)
1346                 BUG("invalid loose object subdirectory: %x", subdir_nr);
1347
1348         origlen = path->len;
1349         strbuf_complete(path, '/');
1350         strbuf_addf(path, "%02x", subdir_nr);
1351
1352         dir = opendir(path->buf);
1353         if (!dir) {
1354                 if (errno != ENOENT)
1355                         r = error_errno(_("unable to open %s"), path->buf);
1356                 strbuf_setlen(path, origlen);
1357                 return r;
1358         }
1359
1360         oid.hash[0] = subdir_nr;
1361         strbuf_addch(path, '/');
1362         baselen = path->len;
1363
1364         while ((de = readdir_skip_dot_and_dotdot(dir))) {
1365                 size_t namelen;
1366
1367                 namelen = strlen(de->d_name);
1368                 strbuf_setlen(path, baselen);
1369                 strbuf_add(path, de->d_name, namelen);
1370                 if (namelen == the_hash_algo->hexsz - 2 &&
1371                     !hex_to_bytes(oid.hash + 1, de->d_name,
1372                                   the_hash_algo->rawsz - 1)) {
1373                         oid_set_algo(&oid, the_hash_algo);
1374                         memset(oid.hash + the_hash_algo->rawsz, 0,
1375                                GIT_MAX_RAWSZ - the_hash_algo->rawsz);
1376                         if (obj_cb) {
1377                                 r = obj_cb(&oid, path->buf, data);
1378                                 if (r)
1379                                         break;
1380                         }
1381                         continue;
1382                 }
1383
1384                 if (cruft_cb) {
1385                         r = cruft_cb(de->d_name, path->buf, data);
1386                         if (r)
1387                                 break;
1388                 }
1389         }
1390         closedir(dir);
1391
1392         strbuf_setlen(path, baselen - 1);
1393         if (!r && subdir_cb)
1394                 r = subdir_cb(subdir_nr, path->buf, data);
1395
1396         strbuf_setlen(path, origlen);
1397
1398         return r;
1399 }
1400
1401 int for_each_loose_file_in_objdir_buf(struct strbuf *path,
1402                             each_loose_object_fn obj_cb,
1403                             each_loose_cruft_fn cruft_cb,
1404                             each_loose_subdir_fn subdir_cb,
1405                             void *data)
1406 {
1407         int r = 0;
1408         int i;
1409
1410         for (i = 0; i < 256; i++) {
1411                 r = for_each_file_in_obj_subdir(i, path, obj_cb, cruft_cb,
1412                                                 subdir_cb, data);
1413                 if (r)
1414                         break;
1415         }
1416
1417         return r;
1418 }
1419
1420 int for_each_loose_file_in_objdir(const char *path,
1421                                   each_loose_object_fn obj_cb,
1422                                   each_loose_cruft_fn cruft_cb,
1423                                   each_loose_subdir_fn subdir_cb,
1424                                   void *data)
1425 {
1426         struct strbuf buf = STRBUF_INIT;
1427         int r;
1428
1429         strbuf_addstr(&buf, path);
1430         r = for_each_loose_file_in_objdir_buf(&buf, obj_cb, cruft_cb,
1431                                               subdir_cb, data);
1432         strbuf_release(&buf);
1433
1434         return r;
1435 }
1436
1437 int for_each_loose_object(each_loose_object_fn cb, void *data,
1438                           enum for_each_object_flags flags)
1439 {
1440         struct odb_source *source;
1441
1442         odb_prepare_alternates(the_repository->objects);
1443         for (source = the_repository->objects->sources; source; source = source->next) {
1444                 int r = for_each_loose_file_in_objdir(source->path, cb, NULL,
1445                                                       NULL, data);
1446                 if (r)
1447                         return r;
1448
1449                 if (flags & FOR_EACH_OBJECT_LOCAL_ONLY)
1450                         break;
1451         }
1452
1453         return 0;
1454 }
1455
1456 static int append_loose_object(const struct object_id *oid,
1457                                const char *path UNUSED,
1458                                void *data)
1459 {
1460         oidtree_insert(data, oid);
1461         return 0;
1462 }
1463
1464 struct oidtree *odb_loose_cache(struct odb_source *source,
1465                                 const struct object_id *oid)
1466 {
1467         int subdir_nr = oid->hash[0];
1468         struct strbuf buf = STRBUF_INIT;
1469         size_t word_bits = bitsizeof(source->loose_objects_subdir_seen[0]);
1470         size_t word_index = subdir_nr / word_bits;
1471         size_t mask = (size_t)1u << (subdir_nr % word_bits);
1472         uint32_t *bitmap;
1473
1474         if (subdir_nr < 0 ||
1475             subdir_nr >= bitsizeof(source->loose_objects_subdir_seen))
1476                 BUG("subdir_nr out of range");
1477
1478         bitmap = &source->loose_objects_subdir_seen[word_index];
1479         if (*bitmap & mask)
1480                 return source->loose_objects_cache;
1481         if (!source->loose_objects_cache) {
1482                 ALLOC_ARRAY(source->loose_objects_cache, 1);
1483                 oidtree_init(source->loose_objects_cache);
1484         }
1485         strbuf_addstr(&buf, source->path);
1486         for_each_file_in_obj_subdir(subdir_nr, &buf,
1487                                     append_loose_object,
1488                                     NULL, NULL,
1489                                     source->loose_objects_cache);
1490         *bitmap |= mask;
1491         strbuf_release(&buf);
1492         return source->loose_objects_cache;
1493 }
1494
1495 void odb_clear_loose_cache(struct odb_source *source)
1496 {
1497         oidtree_clear(source->loose_objects_cache);
1498         FREE_AND_NULL(source->loose_objects_cache);
1499         memset(&source->loose_objects_subdir_seen, 0,
1500                sizeof(source->loose_objects_subdir_seen));
1501 }
1502
1503 static int check_stream_oid(git_zstream *stream,
1504                             const char *hdr,
1505                             unsigned long size,
1506                             const char *path,
1507                             const struct object_id *expected_oid)
1508 {
1509         struct git_hash_ctx c;
1510         struct object_id real_oid;
1511         unsigned char buf[4096];
1512         unsigned long total_read;
1513         int status = Z_OK;
1514
1515         the_hash_algo->init_fn(&c);
1516         git_hash_update(&c, hdr, stream->total_out);
1517
1518         /*
1519          * We already read some bytes into hdr, but the ones up to the NUL
1520          * do not count against the object's content size.
1521          */
1522         total_read = stream->total_out - strlen(hdr) - 1;
1523
1524         /*
1525          * This size comparison must be "<=" to read the final zlib packets;
1526          * see the comment in unpack_loose_rest for details.
1527          */
1528         while (total_read <= size &&
1529                (status == Z_OK ||
1530                 (status == Z_BUF_ERROR && !stream->avail_out))) {
1531                 stream->next_out = buf;
1532                 stream->avail_out = sizeof(buf);
1533                 if (size - total_read < stream->avail_out)
1534                         stream->avail_out = size - total_read;
1535                 status = git_inflate(stream, Z_FINISH);
1536                 git_hash_update(&c, buf, stream->next_out - buf);
1537                 total_read += stream->next_out - buf;
1538         }
1539
1540         if (status != Z_STREAM_END) {
1541                 error(_("corrupt loose object '%s'"), oid_to_hex(expected_oid));
1542                 return -1;
1543         }
1544         if (stream->avail_in) {
1545                 error(_("garbage at end of loose object '%s'"),
1546                       oid_to_hex(expected_oid));
1547                 return -1;
1548         }
1549
1550         git_hash_final_oid(&real_oid, &c);
1551         if (!oideq(expected_oid, &real_oid)) {
1552                 error(_("hash mismatch for %s (expected %s)"), path,
1553                       oid_to_hex(expected_oid));
1554                 return -1;
1555         }
1556
1557         return 0;
1558 }
1559
1560 int read_loose_object(const char *path,
1561                       const struct object_id *expected_oid,
1562                       struct object_id *real_oid,
1563                       void **contents,
1564                       struct object_info *oi)
1565 {
1566         int ret = -1;
1567         int fd;
1568         void *map = NULL;
1569         unsigned long mapsize;
1570         git_zstream stream;
1571         char hdr[MAX_HEADER_LEN];
1572         unsigned long *size = oi->sizep;
1573
1574         fd = git_open(path);
1575         if (fd >= 0)
1576                 map = map_fd(fd, path, &mapsize);
1577         if (!map) {
1578                 error_errno(_("unable to mmap %s"), path);
1579                 goto out;
1580         }
1581
1582         if (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr)) != ULHR_OK) {
1583                 error(_("unable to unpack header of %s"), path);
1584                 goto out_inflate;
1585         }
1586
1587         if (parse_loose_header(hdr, oi) < 0) {
1588                 error(_("unable to parse header of %s"), path);
1589                 goto out_inflate;
1590         }
1591
1592         if (*oi->typep < 0) {
1593                 error(_("unable to parse type from header '%s' of %s"),
1594                       hdr, path);
1595                 goto out_inflate;
1596         }
1597
1598         if (*oi->typep == OBJ_BLOB &&
1599             *size > repo_settings_get_big_file_threshold(the_repository)) {
1600                 if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0)
1601                         goto out_inflate;
1602         } else {
1603                 *contents = unpack_loose_rest(&stream, hdr, *size, expected_oid);
1604                 if (!*contents) {
1605                         error(_("unable to unpack contents of %s"), path);
1606                         goto out_inflate;
1607                 }
1608                 hash_object_file(the_repository->hash_algo,
1609                                  *contents, *size,
1610                                  *oi->typep, real_oid);
1611                 if (!oideq(expected_oid, real_oid))
1612                         goto out_inflate;
1613         }
1614
1615         ret = 0; /* everything checks out */
1616
1617 out_inflate:
1618         git_inflate_end(&stream);
1619 out:
1620         if (map)
1621                 munmap(map, mapsize);
1622         return ret;
1623 }