The sixteenth batch
[git/gitster.git] / mailinfo.c
blobb4e815b2d8b03c4d228b1c4178cf1a3fdc3d88e5
1 #define DISABLE_SIGN_COMPARE_WARNINGS
3 #include "git-compat-util.h"
4 #include "config.h"
5 #include "gettext.h"
6 #include "hex-ll.h"
7 #include "utf8.h"
8 #include "strbuf.h"
9 #include "mailinfo.h"
11 static void cleanup_space(struct strbuf *sb)
13 size_t pos, cnt;
14 for (pos = 0; pos < sb->len; pos++) {
15 if (isspace(sb->buf[pos])) {
16 sb->buf[pos] = ' ';
17 for (cnt = 0; isspace(sb->buf[pos + cnt + 1]); cnt++);
18 strbuf_remove(sb, pos + 1, cnt);
23 static void get_sane_name(struct strbuf *out, struct strbuf *name, struct strbuf *email)
25 struct strbuf *src = name;
26 if (!name->len || 60 < name->len || strpbrk(name->buf, "@<>"))
27 src = email;
28 else if (name == out)
29 return;
30 strbuf_reset(out);
31 strbuf_addbuf(out, src);
34 static void parse_bogus_from(struct mailinfo *mi, const struct strbuf *line)
36 /* John Doe <johndoe> */
38 char *bra, *ket;
39 /* This is fallback, so do not bother if we already have an
40 * e-mail address.
42 if (mi->email.len)
43 return;
45 bra = strchr(line->buf, '<');
46 if (!bra)
47 return;
48 ket = strchr(bra, '>');
49 if (!ket)
50 return;
52 strbuf_reset(&mi->email);
53 strbuf_add(&mi->email, bra + 1, ket - bra - 1);
55 strbuf_reset(&mi->name);
56 strbuf_add(&mi->name, line->buf, bra - line->buf);
57 strbuf_trim(&mi->name);
58 get_sane_name(&mi->name, &mi->name, &mi->email);
61 static const char *unquote_comment(struct strbuf *outbuf, const char *in)
63 int take_next_literally = 0;
64 int depth = 1;
66 strbuf_addch(outbuf, '(');
68 while (*in) {
69 int c = *in++;
70 if (take_next_literally == 1) {
71 take_next_literally = 0;
72 } else {
73 switch (c) {
74 case '\\':
75 take_next_literally = 1;
76 continue;
77 case '(':
78 strbuf_addch(outbuf, '(');
79 depth++;
80 continue;
81 case ')':
82 strbuf_addch(outbuf, ')');
83 if (!--depth)
84 return in;
85 continue;
89 strbuf_addch(outbuf, c);
92 return in;
95 static const char *unquote_quoted_string(struct strbuf *outbuf, const char *in)
97 int take_next_literally = 0;
99 while (*in) {
100 int c = *in++;
101 if (take_next_literally == 1) {
102 take_next_literally = 0;
103 } else {
104 switch (c) {
105 case '\\':
106 take_next_literally = 1;
107 continue;
108 case '"':
109 return in;
113 strbuf_addch(outbuf, c);
116 return in;
119 static void unquote_quoted_pair(struct strbuf *line)
121 struct strbuf outbuf;
122 const char *in = line->buf;
123 int c;
125 strbuf_init(&outbuf, line->len);
127 while ((c = *in++) != 0) {
128 switch (c) {
129 case '"':
130 in = unquote_quoted_string(&outbuf, in);
131 continue;
132 case '(':
133 in = unquote_comment(&outbuf, in);
134 continue;
137 strbuf_addch(&outbuf, c);
140 strbuf_swap(&outbuf, line);
141 strbuf_release(&outbuf);
145 static void handle_from(struct mailinfo *mi, const struct strbuf *from)
147 char *at;
148 size_t el;
149 struct strbuf f;
151 strbuf_init(&f, from->len);
152 strbuf_addbuf(&f, from);
154 unquote_quoted_pair(&f);
156 at = strchr(f.buf, '@');
157 if (!at) {
158 parse_bogus_from(mi, from);
159 goto out;
163 * If we already have one email, don't take any confusing lines
165 if (mi->email.len && strchr(at + 1, '@'))
166 goto out;
168 /* Pick up the string around '@', possibly delimited with <>
169 * pair; that is the email part.
171 while (at > f.buf) {
172 char c = at[-1];
173 if (isspace(c))
174 break;
175 if (c == '<') {
176 at[-1] = ' ';
177 break;
179 at--;
181 el = strcspn(at, " \n\t\r\v\f>");
182 strbuf_reset(&mi->email);
183 strbuf_add(&mi->email, at, el);
184 strbuf_remove(&f, at - f.buf, el + (at[el] ? 1 : 0));
186 /* The remainder is name. It could be
188 * - "John Doe <john.doe@xz>" (a), or
189 * - "john.doe@xz (John Doe)" (b), or
190 * - "John (zzz) Doe <john.doe@xz> (Comment)" (c)
192 * but we have removed the email part, so
194 * - remove extra spaces which could stay after email (case 'c'), and
195 * - trim from both ends, possibly removing the () pair at the end
196 * (cases 'a' and 'b').
198 cleanup_space(&f);
199 strbuf_trim(&f);
200 if (f.buf[0] == '(' && f.len && f.buf[f.len - 1] == ')') {
201 strbuf_remove(&f, 0, 1);
202 strbuf_setlen(&f, f.len - 1);
205 get_sane_name(&mi->name, &f, &mi->email);
206 out:
207 strbuf_release(&f);
210 static void handle_header(struct strbuf **out, const struct strbuf *line)
212 if (!*out) {
213 *out = xmalloc(sizeof(struct strbuf));
214 strbuf_init(*out, line->len);
215 } else
216 strbuf_reset(*out);
218 strbuf_addbuf(*out, line);
221 /* NOTE NOTE NOTE. We do not claim we do full MIME. We just attempt
222 * to have enough heuristics to grok MIME encoded patches often found
223 * on our mailing lists. For example, we do not even treat header lines
224 * case insensitively.
227 static int slurp_attr(const char *line, const char *name, struct strbuf *attr)
229 const char *ends, *ap = strcasestr(line, name);
230 size_t sz;
232 strbuf_setlen(attr, 0);
233 if (!ap)
234 return 0;
235 ap += strlen(name);
236 if (*ap == '"') {
237 ap++;
238 ends = "\"";
240 else
241 ends = "; \t";
242 sz = strcspn(ap, ends);
243 strbuf_add(attr, ap, sz);
244 return 1;
247 static int has_attr_value(const char *line, const char *name, const char *value)
249 struct strbuf sb = STRBUF_INIT;
250 int rc = slurp_attr(line, name, &sb) && !strcasecmp(sb.buf, value);
251 strbuf_release(&sb);
252 return rc;
255 static void handle_content_type(struct mailinfo *mi, struct strbuf *line)
257 struct strbuf *boundary = xmalloc(sizeof(struct strbuf));
258 strbuf_init(boundary, line->len);
260 mi->format_flowed = has_attr_value(line->buf, "format=", "flowed");
261 mi->delsp = has_attr_value(line->buf, "delsp=", "yes");
263 if (slurp_attr(line->buf, "boundary=", boundary)) {
264 strbuf_insertstr(boundary, 0, "--");
265 if (++mi->content_top >= &mi->content[MAX_BOUNDARIES]) {
266 error("Too many boundaries to handle");
267 mi->input_error = -1;
268 mi->content_top = &mi->content[MAX_BOUNDARIES] - 1;
269 strbuf_release(boundary);
270 free(boundary);
271 return;
273 *(mi->content_top) = boundary;
274 boundary = NULL;
276 slurp_attr(line->buf, "charset=", &mi->charset);
278 if (boundary) {
279 strbuf_release(boundary);
280 free(boundary);
284 static void handle_content_transfer_encoding(struct mailinfo *mi,
285 const struct strbuf *line)
287 if (strcasestr(line->buf, "base64"))
288 mi->transfer_encoding = TE_BASE64;
289 else if (strcasestr(line->buf, "quoted-printable"))
290 mi->transfer_encoding = TE_QP;
291 else
292 mi->transfer_encoding = TE_DONTCARE;
295 static int is_multipart_boundary(struct mailinfo *mi, const struct strbuf *line)
297 struct strbuf *content_top = *(mi->content_top);
299 return ((content_top->len <= line->len) &&
300 !memcmp(line->buf, content_top->buf, content_top->len));
303 static void cleanup_subject(struct mailinfo *mi, struct strbuf *subject)
305 size_t at = 0;
307 while (at < subject->len) {
308 char *pos;
309 size_t remove;
311 switch (subject->buf[at]) {
312 case 'r': case 'R':
313 if (subject->len <= at + 3)
314 break;
315 if ((subject->buf[at + 1] == 'e' ||
316 subject->buf[at + 1] == 'E') &&
317 subject->buf[at + 2] == ':') {
318 strbuf_remove(subject, at, 3);
319 continue;
321 at++;
322 break;
323 case ' ': case '\t': case ':':
324 strbuf_remove(subject, at, 1);
325 continue;
326 case '[':
327 pos = strchr(subject->buf + at, ']');
328 if (!pos)
329 break;
330 remove = pos - (subject->buf + at) + 1;
331 if (!mi->keep_non_patch_brackets_in_subject ||
332 (7 <= remove &&
333 memmem(subject->buf + at, remove, "PATCH", 5)))
334 strbuf_remove(subject, at, remove);
335 else {
336 at += remove;
338 * If the input had a space after the ], keep
339 * it. We don't bother with finding the end of
340 * the space, since we later normalize it
341 * anyway.
343 if (isspace(subject->buf[at]))
344 at += 1;
346 continue;
348 break;
350 strbuf_trim(subject);
353 static const char * const header[] = {
354 "From", "Subject", "Date",
357 static inline int skip_header(const struct strbuf *line, const char *hdr,
358 const char **outval)
360 const char *val;
361 if (!skip_iprefix(line->buf, hdr, &val) ||
362 *val++ != ':')
363 return 0;
364 while (isspace(*val))
365 val++;
366 *outval = val;
367 return 1;
370 static int is_format_patch_separator(const char *line, int len)
372 static const char SAMPLE[] =
373 "From e6807f3efca28b30decfecb1732a56c7db1137ee Mon Sep 17 00:00:00 2001\n";
374 const char *cp;
376 if (len != strlen(SAMPLE))
377 return 0;
378 if (!skip_prefix(line, "From ", &cp))
379 return 0;
380 if (strspn(cp, "0123456789abcdef") != 40)
381 return 0;
382 cp += 40;
383 return !memcmp(SAMPLE + (cp - line), cp, strlen(SAMPLE) - (cp - line));
386 static int decode_q_segment(struct strbuf *out, const struct strbuf *q_seg,
387 int rfc2047)
389 const char *in = q_seg->buf;
390 int c;
391 strbuf_grow(out, q_seg->len);
393 while ((c = *in++) != 0) {
394 if (c == '=') {
395 int ch, d = *in;
396 if (d == '\n' || !d)
397 break; /* drop trailing newline */
398 ch = hex2chr(in);
399 if (ch >= 0) {
400 strbuf_addch(out, ch);
401 in += 2;
402 continue;
404 /* garbage -- fall through */
406 if (rfc2047 && c == '_') /* rfc2047 4.2 (2) */
407 c = 0x20;
408 strbuf_addch(out, c);
410 return 0;
413 static int decode_b_segment(struct strbuf *out, const struct strbuf *b_seg)
415 /* Decode in..ep, possibly in-place to ot */
416 int c, pos = 0, acc = 0;
417 const char *in = b_seg->buf;
418 strbuf_grow(out, b_seg->len);
420 while ((c = *in++) != 0) {
421 if (c == '+')
422 c = 62;
423 else if (c == '/')
424 c = 63;
425 else if ('A' <= c && c <= 'Z')
426 c -= 'A';
427 else if ('a' <= c && c <= 'z')
428 c -= 'a' - 26;
429 else if ('0' <= c && c <= '9')
430 c -= '0' - 52;
431 else
432 continue; /* garbage */
433 switch (pos++) {
434 case 0:
435 acc = (c << 2);
436 break;
437 case 1:
438 strbuf_addch(out, (acc | (c >> 4)));
439 acc = (c & 15) << 4;
440 break;
441 case 2:
442 strbuf_addch(out, (acc | (c >> 2)));
443 acc = (c & 3) << 6;
444 break;
445 case 3:
446 strbuf_addch(out, (acc | c));
447 acc = pos = 0;
448 break;
451 return 0;
454 static int convert_to_utf8(struct mailinfo *mi,
455 struct strbuf *line, const char *charset)
457 char *out;
458 size_t out_len;
460 if (!mi->metainfo_charset || !charset || !*charset)
461 return 0;
463 if (same_encoding(mi->metainfo_charset, charset))
464 return 0;
465 out = reencode_string_len(line->buf, line->len,
466 mi->metainfo_charset, charset, &out_len);
467 if (!out) {
468 mi->input_error = -1;
469 return error("cannot convert from %s to %s",
470 charset, mi->metainfo_charset);
472 strbuf_attach(line, out, out_len, out_len);
473 return 0;
476 static void decode_header(struct mailinfo *mi, struct strbuf *it)
478 char *in, *ep, *cp;
479 struct strbuf outbuf = STRBUF_INIT, dec = STRBUF_INIT;
480 struct strbuf charset_q = STRBUF_INIT, piecebuf = STRBUF_INIT;
481 int found_error = 1; /* pessimism */
483 in = it->buf;
484 while (in - it->buf <= it->len && (ep = strstr(in, "=?")) != NULL) {
485 int encoding;
486 strbuf_reset(&charset_q);
487 strbuf_reset(&piecebuf);
489 if (in != ep) {
491 * We are about to process an encoded-word
492 * that begins at ep, but there is something
493 * before the encoded word.
495 char *scan;
496 for (scan = in; scan < ep; scan++)
497 if (!isspace(*scan))
498 break;
500 if (scan != ep || in == it->buf) {
502 * We should not lose that "something",
503 * unless we have just processed an
504 * encoded-word, and there is only LWS
505 * before the one we are about to process.
507 strbuf_add(&outbuf, in, ep - in);
510 /* E.g.
511 * ep : "=?iso-2022-jp?B?GyR...?= foo"
512 * ep : "=?ISO-8859-1?Q?Foo=FCbar?= baz"
514 ep += 2;
516 if (ep - it->buf >= it->len || !(cp = strchr(ep, '?')))
517 goto release_return;
519 if (cp + 3 - it->buf > it->len)
520 goto release_return;
521 strbuf_add(&charset_q, ep, cp - ep);
523 encoding = cp[1];
524 if (!encoding || cp[2] != '?')
525 goto release_return;
526 ep = strstr(cp + 3, "?=");
527 if (!ep)
528 goto release_return;
529 strbuf_add(&piecebuf, cp + 3, ep - cp - 3);
530 switch (tolower(encoding)) {
531 default:
532 goto release_return;
533 case 'b':
534 if ((found_error = decode_b_segment(&dec, &piecebuf)))
535 goto release_return;
536 break;
537 case 'q':
538 if ((found_error = decode_q_segment(&dec, &piecebuf, 1)))
539 goto release_return;
540 break;
542 if (convert_to_utf8(mi, &dec, charset_q.buf))
543 goto release_return;
545 strbuf_addbuf(&outbuf, &dec);
546 strbuf_release(&dec);
547 in = ep + 2;
549 strbuf_addstr(&outbuf, in);
550 strbuf_reset(it);
551 strbuf_addbuf(it, &outbuf);
552 found_error = 0;
553 release_return:
554 strbuf_release(&outbuf);
555 strbuf_release(&charset_q);
556 strbuf_release(&piecebuf);
557 strbuf_release(&dec);
559 if (found_error)
560 mi->input_error = -1;
564 * Returns true if "line" contains a header matching "hdr", in which case "val"
565 * will contain the value of the header with any RFC2047 B and Q encoding
566 * unwrapped, and optionally normalize the meta information to utf8.
568 static int parse_header(const struct strbuf *line,
569 const char *hdr,
570 struct mailinfo *mi,
571 struct strbuf *val)
573 const char *val_str;
575 if (!skip_header(line, hdr, &val_str))
576 return 0;
577 strbuf_addstr(val, val_str);
578 decode_header(mi, val);
579 return 1;
582 static int check_header(struct mailinfo *mi,
583 const struct strbuf *line,
584 struct strbuf *hdr_data[], int overwrite)
586 int i, ret = 0;
587 struct strbuf sb = STRBUF_INIT;
589 /* search for the interesting parts */
590 for (i = 0; i < ARRAY_SIZE(header); i++) {
591 if ((!hdr_data[i] || overwrite) &&
592 parse_header(line, header[i], mi, &sb)) {
593 handle_header(&hdr_data[i], &sb);
594 ret = 1;
595 goto check_header_out;
599 /* Content stuff */
600 if (parse_header(line, "Content-Type", mi, &sb)) {
601 handle_content_type(mi, &sb);
602 ret = 1;
603 goto check_header_out;
605 if (parse_header(line, "Content-Transfer-Encoding", mi, &sb)) {
606 handle_content_transfer_encoding(mi, &sb);
607 ret = 1;
608 goto check_header_out;
610 if (parse_header(line, "Message-ID", mi, &sb)) {
611 if (mi->add_message_id)
612 mi->message_id = strbuf_detach(&sb, NULL);
613 ret = 1;
614 goto check_header_out;
617 check_header_out:
618 strbuf_release(&sb);
619 return ret;
623 * Returns 1 if the given line or any line beginning with the given line is an
624 * in-body header (that is, check_header will succeed when passed
625 * mi->s_hdr_data).
627 static int is_inbody_header(const struct mailinfo *mi,
628 const struct strbuf *line)
630 int i;
631 const char *val;
632 for (i = 0; i < ARRAY_SIZE(header); i++)
633 if (!mi->s_hdr_data[i] && skip_header(line, header[i], &val))
634 return 1;
635 return 0;
638 static void decode_transfer_encoding(struct mailinfo *mi, struct strbuf *line)
640 struct strbuf ret = STRBUF_INIT;
642 switch (mi->transfer_encoding) {
643 case TE_QP:
644 decode_q_segment(&ret, line, 0);
645 break;
646 case TE_BASE64:
647 decode_b_segment(&ret, line);
648 break;
649 case TE_DONTCARE:
650 default:
651 return;
653 strbuf_reset(line);
654 strbuf_addbuf(line, &ret);
655 strbuf_release(&ret);
658 static inline int patchbreak(const struct strbuf *line)
660 size_t i;
662 /* Beginning of a "diff -" header? */
663 if (starts_with(line->buf, "diff -"))
664 return 1;
666 /* CVS "Index: " line? */
667 if (starts_with(line->buf, "Index: "))
668 return 1;
671 * "--- <filename>" starts patches without headers
672 * "---<sp>*" is a manual separator
674 if (line->len < 4)
675 return 0;
677 if (starts_with(line->buf, "---")) {
678 /* space followed by a filename? */
679 if (line->buf[3] == ' ' && !isspace(line->buf[4]))
680 return 1;
681 /* Just whitespace? */
682 for (i = 3; i < line->len; i++) {
683 unsigned char c = line->buf[i];
684 if (c == '\n')
685 return 1;
686 if (!isspace(c))
687 break;
689 return 0;
691 return 0;
694 static int is_scissors_line(const char *line)
696 const char *c;
697 int scissors = 0, gap = 0;
698 const char *first_nonblank = NULL, *last_nonblank = NULL;
699 int visible, perforation = 0, in_perforation = 0;
701 for (c = line; *c; c++) {
702 if (isspace(*c)) {
703 if (in_perforation) {
704 perforation++;
705 gap++;
707 continue;
709 last_nonblank = c;
710 if (!first_nonblank)
711 first_nonblank = c;
712 if (*c == '-') {
713 in_perforation = 1;
714 perforation++;
715 continue;
717 if (starts_with(c, ">8") || starts_with(c, "8<") ||
718 starts_with(c, ">%") || starts_with(c, "%<")) {
719 in_perforation = 1;
720 perforation += 2;
721 scissors += 2;
722 c++;
723 continue;
725 in_perforation = 0;
729 * The mark must be at least 8 bytes long (e.g. "-- >8 --").
730 * Even though there can be arbitrary cruft on the same line
731 * (e.g. "cut here"), in order to avoid misidentification, the
732 * perforation must occupy more than a third of the visible
733 * width of the line, and dashes and scissors must occupy more
734 * than half of the perforation.
737 if (first_nonblank && last_nonblank)
738 visible = last_nonblank - first_nonblank + 1;
739 else
740 visible = 0;
741 return (scissors && 8 <= visible &&
742 visible < perforation * 3 &&
743 gap * 2 < perforation);
746 static void flush_inbody_header_accum(struct mailinfo *mi)
748 if (!mi->inbody_header_accum.len)
749 return;
750 if (!check_header(mi, &mi->inbody_header_accum, mi->s_hdr_data, 0))
751 BUG("inbody_header_accum, if not empty, must always contain a valid in-body header");
752 strbuf_reset(&mi->inbody_header_accum);
755 static int check_inbody_header(struct mailinfo *mi, const struct strbuf *line)
757 if (mi->inbody_header_accum.len &&
758 (line->buf[0] == ' ' || line->buf[0] == '\t')) {
759 if (mi->use_scissors && is_scissors_line(line->buf)) {
761 * This is a scissors line; do not consider this line
762 * as a header continuation line.
764 flush_inbody_header_accum(mi);
765 return 0;
767 strbuf_strip_suffix(&mi->inbody_header_accum, "\n");
768 strbuf_addbuf(&mi->inbody_header_accum, line);
769 return 1;
772 flush_inbody_header_accum(mi);
774 if (starts_with(line->buf, ">From") && isspace(line->buf[5]))
775 return is_format_patch_separator(line->buf + 1, line->len - 1);
776 if (starts_with(line->buf, "[PATCH]") && isspace(line->buf[7])) {
777 int i;
778 for (i = 0; i < ARRAY_SIZE(header); i++)
779 if (!strcmp("Subject", header[i])) {
780 handle_header(&mi->s_hdr_data[i], line);
781 return 1;
783 return 0;
785 if (is_inbody_header(mi, line)) {
786 strbuf_addbuf(&mi->inbody_header_accum, line);
787 return 1;
789 return 0;
792 static int handle_commit_msg(struct mailinfo *mi, struct strbuf *line)
794 assert(!mi->filter_stage);
796 if (mi->header_stage) {
797 if (!line->len || (line->len == 1 && line->buf[0] == '\n')) {
798 if (mi->inbody_header_accum.len) {
799 flush_inbody_header_accum(mi);
800 mi->header_stage = 0;
802 return 0;
806 if (mi->use_inbody_headers && mi->header_stage) {
807 mi->header_stage = check_inbody_header(mi, line);
808 if (mi->header_stage)
809 return 0;
810 } else
811 /* Only trim the first (blank) line of the commit message
812 * when ignoring in-body headers.
814 mi->header_stage = 0;
816 /* normalize the log message to UTF-8. */
817 if (convert_to_utf8(mi, line, mi->charset.buf))
818 return 0; /* mi->input_error already set */
820 if (mi->use_scissors && is_scissors_line(line->buf)) {
821 int i;
823 strbuf_setlen(&mi->log_message, 0);
824 mi->header_stage = 1;
827 * We may have already read "secondary headers"; purge
828 * them to give ourselves a clean restart.
830 for (i = 0; i < ARRAY_SIZE(header); i++) {
831 if (mi->s_hdr_data[i])
832 strbuf_release(mi->s_hdr_data[i]);
833 FREE_AND_NULL(mi->s_hdr_data[i]);
835 return 0;
838 if (patchbreak(line)) {
839 if (mi->message_id)
840 strbuf_addf(&mi->log_message,
841 "Message-ID: %s\n", mi->message_id);
842 return 1;
845 strbuf_addbuf(&mi->log_message, line);
846 return 0;
849 static void handle_patch(struct mailinfo *mi, const struct strbuf *line)
851 fwrite(line->buf, 1, line->len, mi->patchfile);
852 mi->patch_lines++;
855 static void handle_filter(struct mailinfo *mi, struct strbuf *line)
857 switch (mi->filter_stage) {
858 case 0:
859 if (!handle_commit_msg(mi, line))
860 break;
861 mi->filter_stage++;
862 /* fallthrough */
863 case 1:
864 handle_patch(mi, line);
865 break;
869 static int is_rfc2822_header(const struct strbuf *line)
872 * The section that defines the loosest possible
873 * field name is "3.6.8 Optional fields".
875 * optional-field = field-name ":" unstructured CRLF
876 * field-name = 1*ftext
877 * ftext = %d33-57 / %59-126
879 int ch;
880 char *cp = line->buf;
882 /* Count mbox From headers as headers */
883 if (starts_with(cp, "From ") || starts_with(cp, ">From "))
884 return 1;
886 while ((ch = *cp++)) {
887 if (ch == ':')
888 return 1;
889 if ((33 <= ch && ch <= 57) ||
890 (59 <= ch && ch <= 126))
891 continue;
892 break;
894 return 0;
897 static int read_one_header_line(struct strbuf *line, FILE *in)
899 struct strbuf continuation = STRBUF_INIT;
901 /* Get the first part of the line. */
902 if (strbuf_getline_lf(line, in))
903 return 0;
906 * Is it an empty line or not a valid rfc2822 header?
907 * If so, stop here, and return false ("not a header")
909 strbuf_rtrim(line);
910 if (!line->len || !is_rfc2822_header(line)) {
911 /* Re-add the newline */
912 strbuf_addch(line, '\n');
913 return 0;
917 * Now we need to eat all the continuation lines..
918 * Yuck, 2822 header "folding"
920 for (;;) {
921 int peek;
923 peek = fgetc(in);
924 if (peek == EOF)
925 break;
926 ungetc(peek, in);
927 if (peek != ' ' && peek != '\t')
928 break;
929 if (strbuf_getline_lf(&continuation, in))
930 break;
931 continuation.buf[0] = ' ';
932 strbuf_rtrim(&continuation);
933 strbuf_addbuf(line, &continuation);
935 strbuf_release(&continuation);
937 return 1;
940 static int find_boundary(struct mailinfo *mi, struct strbuf *line)
942 while (!strbuf_getline_lf(line, mi->input)) {
943 if (*(mi->content_top) && is_multipart_boundary(mi, line))
944 return 1;
946 return 0;
949 static int handle_boundary(struct mailinfo *mi, struct strbuf *line)
951 struct strbuf newline = STRBUF_INIT;
953 strbuf_addch(&newline, '\n');
954 again:
955 if (line->len >= (*(mi->content_top))->len + 2 &&
956 !memcmp(line->buf + (*(mi->content_top))->len, "--", 2)) {
957 /* we hit an end boundary */
958 /* pop the current boundary off the stack */
959 strbuf_release(*(mi->content_top));
960 FREE_AND_NULL(*(mi->content_top));
962 /* technically won't happen as is_multipart_boundary()
963 will fail first. But just in case..
965 if (--mi->content_top < mi->content) {
966 error("Detected mismatched boundaries, can't recover");
967 mi->input_error = -1;
968 mi->content_top = mi->content;
969 strbuf_release(&newline);
970 return 0;
972 handle_filter(mi, &newline);
973 strbuf_release(&newline);
974 if (mi->input_error)
975 return 0;
977 /* skip to the next boundary */
978 if (!find_boundary(mi, line))
979 return 0;
980 goto again;
983 /* set some defaults */
984 mi->transfer_encoding = TE_DONTCARE;
985 strbuf_reset(&mi->charset);
987 /* slurp in this section's info */
988 while (read_one_header_line(line, mi->input))
989 check_header(mi, line, mi->p_hdr_data, 0);
991 strbuf_release(&newline);
992 /* replenish line */
993 if (strbuf_getline_lf(line, mi->input))
994 return 0;
995 strbuf_addch(line, '\n');
996 return 1;
999 static void handle_filter_flowed(struct mailinfo *mi, struct strbuf *line,
1000 struct strbuf *prev)
1002 size_t len = line->len;
1003 const char *rest;
1005 if (!mi->format_flowed) {
1006 if (len >= 2 &&
1007 line->buf[len - 2] == '\r' &&
1008 line->buf[len - 1] == '\n') {
1009 mi->have_quoted_cr = 1;
1010 if (mi->quoted_cr == quoted_cr_strip) {
1011 strbuf_setlen(line, len - 2);
1012 strbuf_addch(line, '\n');
1013 len--;
1016 handle_filter(mi, line);
1017 return;
1020 if (line->buf[len - 1] == '\n') {
1021 len--;
1022 if (len && line->buf[len - 1] == '\r')
1023 len--;
1026 /* Keep signature separator as-is. */
1027 if (skip_prefix(line->buf, "-- ", &rest) && rest - line->buf == len) {
1028 if (prev->len) {
1029 handle_filter(mi, prev);
1030 strbuf_reset(prev);
1032 handle_filter(mi, line);
1033 return;
1036 /* Unstuff space-stuffed line. */
1037 if (len && line->buf[0] == ' ') {
1038 strbuf_remove(line, 0, 1);
1039 len--;
1042 /* Save flowed line for later, but without the soft line break. */
1043 if (len && line->buf[len - 1] == ' ') {
1044 strbuf_add(prev, line->buf, len - !!mi->delsp);
1045 return;
1048 /* Prepend any previous partial lines */
1049 strbuf_insert(line, 0, prev->buf, prev->len);
1050 strbuf_reset(prev);
1052 handle_filter(mi, line);
1055 static void summarize_quoted_cr(struct mailinfo *mi)
1057 if (mi->have_quoted_cr &&
1058 mi->quoted_cr == quoted_cr_warn)
1059 warning(_("quoted CRLF detected"));
1062 static void handle_body(struct mailinfo *mi, struct strbuf *line)
1064 struct strbuf prev = STRBUF_INIT;
1066 /* Skip up to the first boundary */
1067 if (*(mi->content_top)) {
1068 if (!find_boundary(mi, line))
1069 goto handle_body_out;
1072 do {
1073 /* process any boundary lines */
1074 if (*(mi->content_top) && is_multipart_boundary(mi, line)) {
1075 /* flush any leftover */
1076 if (prev.len) {
1077 handle_filter(mi, &prev);
1078 strbuf_reset(&prev);
1080 summarize_quoted_cr(mi);
1081 mi->have_quoted_cr = 0;
1082 if (!handle_boundary(mi, line))
1083 goto handle_body_out;
1086 /* Unwrap transfer encoding */
1087 decode_transfer_encoding(mi, line);
1089 switch (mi->transfer_encoding) {
1090 case TE_BASE64:
1091 case TE_QP:
1093 struct strbuf **lines, **it, *sb;
1095 /* Prepend any previous partial lines */
1096 strbuf_insert(line, 0, prev.buf, prev.len);
1097 strbuf_reset(&prev);
1100 * This is a decoded line that may contain
1101 * multiple new lines. Pass only one chunk
1102 * at a time to handle_filter()
1104 lines = strbuf_split(line, '\n');
1105 for (it = lines; (sb = *it); it++) {
1106 if (!*(it + 1)) /* The last line */
1107 if (sb->buf[sb->len - 1] != '\n') {
1108 /* Partial line, save it for later. */
1109 strbuf_addbuf(&prev, sb);
1110 break;
1112 handle_filter_flowed(mi, sb, &prev);
1115 * The partial chunk is saved in "prev" and will be
1116 * appended by the next iteration of read_line_with_nul().
1118 strbuf_list_free(lines);
1119 break;
1121 default:
1122 handle_filter_flowed(mi, line, &prev);
1125 if (mi->input_error)
1126 break;
1127 } while (!strbuf_getwholeline(line, mi->input, '\n'));
1129 if (prev.len)
1130 handle_filter(mi, &prev);
1131 summarize_quoted_cr(mi);
1133 flush_inbody_header_accum(mi);
1135 handle_body_out:
1136 strbuf_release(&prev);
1139 static void output_header_lines(FILE *fout, const char *hdr, const struct strbuf *data)
1141 const char *sp = data->buf;
1142 while (1) {
1143 char *ep = strchr(sp, '\n');
1144 int len;
1145 if (!ep)
1146 len = strlen(sp);
1147 else
1148 len = ep - sp;
1149 fprintf(fout, "%s: %.*s\n", hdr, len, sp);
1150 if (!ep)
1151 break;
1152 sp = ep + 1;
1156 static void handle_info(struct mailinfo *mi)
1158 struct strbuf *hdr;
1159 int i;
1161 for (i = 0; i < ARRAY_SIZE(header); i++) {
1162 /* only print inbody headers if we output a patch file */
1163 if (mi->patch_lines && mi->s_hdr_data[i])
1164 hdr = mi->s_hdr_data[i];
1165 else if (mi->p_hdr_data[i])
1166 hdr = mi->p_hdr_data[i];
1167 else
1168 continue;
1170 if (memchr(hdr->buf, '\0', hdr->len)) {
1171 error("a NUL byte in '%s' is not allowed.", header[i]);
1172 mi->input_error = -1;
1175 if (!strcmp(header[i], "Subject")) {
1176 if (!mi->keep_subject) {
1177 cleanup_subject(mi, hdr);
1178 cleanup_space(hdr);
1180 output_header_lines(mi->output, "Subject", hdr);
1181 } else if (!strcmp(header[i], "From")) {
1182 cleanup_space(hdr);
1183 handle_from(mi, hdr);
1184 fprintf(mi->output, "Author: %s\n", mi->name.buf);
1185 fprintf(mi->output, "Email: %s\n", mi->email.buf);
1186 } else {
1187 cleanup_space(hdr);
1188 fprintf(mi->output, "%s: %s\n", header[i], hdr->buf);
1191 fprintf(mi->output, "\n");
1194 int mailinfo(struct mailinfo *mi, const char *msg, const char *patch)
1196 FILE *cmitmsg;
1197 int peek;
1198 struct strbuf line = STRBUF_INIT;
1200 cmitmsg = fopen(msg, "w");
1201 if (!cmitmsg) {
1202 perror(msg);
1203 return -1;
1205 mi->patchfile = fopen(patch, "w");
1206 if (!mi->patchfile) {
1207 perror(patch);
1208 fclose(cmitmsg);
1209 return -1;
1212 mi->p_hdr_data = xcalloc(ARRAY_SIZE(header), sizeof(*(mi->p_hdr_data)));
1213 mi->s_hdr_data = xcalloc(ARRAY_SIZE(header), sizeof(*(mi->s_hdr_data)));
1215 do {
1216 peek = fgetc(mi->input);
1217 if (peek == EOF) {
1218 fclose(cmitmsg);
1219 return error("empty patch: '%s'", patch);
1221 } while (isspace(peek));
1222 ungetc(peek, mi->input);
1224 /* process the email header */
1225 while (read_one_header_line(&line, mi->input))
1226 check_header(mi, &line, mi->p_hdr_data, 1);
1228 handle_body(mi, &line);
1229 fwrite(mi->log_message.buf, 1, mi->log_message.len, cmitmsg);
1230 fclose(cmitmsg);
1231 fclose(mi->patchfile);
1233 handle_info(mi);
1234 strbuf_release(&line);
1235 return mi->input_error;
1238 int mailinfo_parse_quoted_cr_action(const char *actionstr, int *action)
1240 if (!strcmp(actionstr, "nowarn"))
1241 *action = quoted_cr_nowarn;
1242 else if (!strcmp(actionstr, "warn"))
1243 *action = quoted_cr_warn;
1244 else if (!strcmp(actionstr, "strip"))
1245 *action = quoted_cr_strip;
1246 else
1247 return -1;
1248 return 0;
1251 static int git_mailinfo_config(const char *var, const char *value,
1252 const struct config_context *ctx, void *mi_)
1254 struct mailinfo *mi = mi_;
1256 if (!starts_with(var, "mailinfo."))
1257 return git_default_config(var, value, ctx, NULL);
1258 if (!strcmp(var, "mailinfo.scissors")) {
1259 mi->use_scissors = git_config_bool(var, value);
1260 return 0;
1262 if (!strcmp(var, "mailinfo.quotedcr")) {
1263 if (!value)
1264 return config_error_nonbool(var);
1265 if (mailinfo_parse_quoted_cr_action(value, &mi->quoted_cr) != 0)
1266 return error(_("bad action '%s' for '%s'"), value, var);
1267 return 0;
1269 /* perhaps others here */
1270 return 0;
1273 void setup_mailinfo(struct repository *r, struct mailinfo *mi)
1275 memset(mi, 0, sizeof(*mi));
1276 strbuf_init(&mi->name, 0);
1277 strbuf_init(&mi->email, 0);
1278 strbuf_init(&mi->charset, 0);
1279 strbuf_init(&mi->log_message, 0);
1280 strbuf_init(&mi->inbody_header_accum, 0);
1281 mi->quoted_cr = quoted_cr_warn;
1282 mi->header_stage = 1;
1283 mi->use_inbody_headers = 1;
1284 mi->content_top = mi->content;
1285 repo_config(r, git_mailinfo_config, mi);
1288 void clear_mailinfo(struct mailinfo *mi)
1290 strbuf_release(&mi->name);
1291 strbuf_release(&mi->email);
1292 strbuf_release(&mi->charset);
1293 strbuf_release(&mi->inbody_header_accum);
1294 free(mi->message_id);
1296 for (size_t i = 0; i < ARRAY_SIZE(header); i++) {
1297 if (!mi->p_hdr_data[i])
1298 continue;
1299 strbuf_release(mi->p_hdr_data[i]);
1300 free(mi->p_hdr_data[i]);
1302 free(mi->p_hdr_data);
1304 for (size_t i = 0; i < ARRAY_SIZE(header); i++) {
1305 if (!mi->s_hdr_data[i])
1306 continue;
1307 strbuf_release(mi->s_hdr_data[i]);
1308 free(mi->s_hdr_data[i]);
1310 free(mi->s_hdr_data);
1312 while (mi->content < mi->content_top) {
1313 free(*(mi->content_top));
1314 mi->content_top--;
1317 strbuf_release(&mi->log_message);