2 ** Copyright 2000-2003 Double Precision, Inc.
3 ** See COPYING for distribution information.
19 static void swapmsgdata(struct imap_refmsg
*a
, struct imap_refmsg
*b
)
26 #define swap(a,b,tmp) (tmp)=(a); (a)=(b); (b)=(tmp);
28 swap(a
->msgid
, b
->msgid
, cp
);
29 swap(a
->isdummy
, b
->isdummy
, c
);
30 swap(a
->flag2
, b
->flag2
, c
);
32 swap(a
->timestamp
, b
->timestamp
, t
);
33 swap(a
->seqnum
, b
->seqnum
, ul
);
38 struct imap_refmsgtable
*rfc822_threadalloc()
40 struct imap_refmsgtable
*p
;
42 p
=(struct imap_refmsgtable
*)malloc(sizeof(struct imap_refmsgtable
));
44 memset(p
, 0, sizeof(*p
));
48 void rfc822_threadfree(struct imap_refmsgtable
*p
)
51 struct imap_refmsghash
*h
;
52 struct imap_subjlookup
*s
;
53 struct imap_refmsg
*m
;
55 for (i
=0; i
<sizeof(p
->hashtable
)/sizeof(p
->hashtable
[0]); i
++)
56 while ((h
=p
->hashtable
[i
]) != 0)
58 p
->hashtable
[i
]=h
->nexthash
;
62 for (i
=0; i
<sizeof(p
->subjtable
)/sizeof(p
->subjtable
[0]); i
++)
63 while ((s
=p
->subjtable
[i
]) != 0)
65 p
->subjtable
[i
]=s
->nextsubj
;
70 while ((m
=p
->firstmsg
) != 0)
80 static int hashmsgid(const char *msgid
)
82 unsigned long hashno
=0;
86 unsigned long n
= (hashno
<< 1);
88 #define HMIDS (((struct imap_refmsgtable *)0)->hashtable)
89 #define HHMIDSS ( sizeof(HMIDS) / sizeof( HMIDS[0] ))
91 if (hashno
& HHMIDSS
)
94 hashno
= n
^ (unsigned char)*msgid
++;
97 return (hashno
% HHMIDSS
);
100 struct imap_refmsg
*rfc822_threadallocmsg(struct imap_refmsgtable
*mt
,
103 int n
=hashmsgid(msgid
);
104 struct imap_refmsg
*msgp
= (struct imap_refmsg
*)
105 malloc(sizeof(struct imap_refmsg
)+1+strlen(msgid
));
106 struct imap_refmsghash
*h
, **hp
;
108 if (!msgp
) return (0);
109 memset(msgp
, 0, sizeof(*msgp
));
110 strcpy ((msgp
->msgid
=(char *)(msgp
+1)), msgid
);
112 h
=(struct imap_refmsghash
*)malloc(sizeof(struct imap_refmsghash
));
119 for (hp
= &mt
->hashtable
[n
]; *hp
; hp
= & (*hp
)->nexthash
)
121 if (strcmp( (*hp
)->msg
->msgid
, msgp
->msgid
) > 0)
129 msgp
->last
=mt
->lastmsg
;
132 mt
->lastmsg
->next
=msgp
;
140 struct imap_refmsg
*rfc822_threadsearchmsg(struct imap_refmsgtable
*mt
,
143 int n
=hashmsgid(msgid
);
144 struct imap_refmsghash
*h
;
146 for (h
= mt
->hashtable
[n
]; h
; h
= h
->nexthash
)
148 int rc
=strcmp(h
->msg
->msgid
, msgid
);
150 if (rc
== 0) return (h
->msg
);
157 static int findsubj(struct imap_refmsgtable
*mt
, const char *s
, int *isrefwd
,
158 int create
, struct imap_subjlookup
**ptr
)
160 char *ss
=rfc822_coresubj(s
, isrefwd
);
162 struct imap_subjlookup
**h
;
163 struct imap_subjlookup
*newsubj
;
165 if (!ss
) return (-1);
168 for (h
= &mt
->subjtable
[n
]; *h
; h
= &(*h
)->nextsubj
)
170 int rc
=strcmp((*h
)->subj
, ss
);
188 newsubj
=malloc(sizeof(struct imap_subjlookup
));
194 memset(newsubj
, 0, sizeof(*newsubj
));
196 newsubj
->nextsubj
= *h
;
197 newsubj
->msgisrefwd
= *isrefwd
;
203 static void linkparent(struct imap_refmsg
*msg
, struct imap_refmsg
*lastmsg
)
206 msg
->prevsib
=lastmsg
->lastchild
;
208 msg
->prevsib
->nextsib
=msg
;
210 lastmsg
->firstchild
=msg
;
212 lastmsg
->lastchild
=msg
;
217 static void breakparent(struct imap_refmsg
*m
)
219 if (!m
->parent
) return;
221 if (m
->prevsib
) m
->prevsib
->nextsib
=m
->nextsib
;
222 else m
->parent
->firstchild
=m
->nextsib
;
224 if (m
->nextsib
) m
->nextsib
->prevsib
=m
->prevsib
;
225 else m
->parent
->lastchild
=m
->prevsib
;
229 static struct imap_refmsg
*dorefcreate(struct imap_refmsgtable
*mt
,
230 const char *newmsgid
,
232 /* a - references header */
234 struct imap_refmsg
*lastmsg
=0, *m
;
235 struct imap_refmsg
*msg
;
239 (A) Using the Message-IDs in the message's references, link
240 the corresponding messages together as parent/child. Make
241 the first reference the parent of the second (and the second
242 a child of the first), the second the parent of the third
243 (and the third a child of the second), etc. The following
244 rules govern the creation of these links:
246 If no reference message can be found with a given
247 Message-ID, create a dummy message with this ID. Use
248 this dummy message for all subsequent references to this
252 for (n
=0; n
<a
->naddrs
; n
++)
254 char *msgid
=a
->addrs
[n
].tokens
? rfc822_getaddr(a
, n
):NULL
;
256 msg
=msgid
? rfc822_threadsearchmsg(mt
, msgid
):0;
259 msg
=rfc822_threadallocmsg(mt
, msgid
? msgid
:"");
274 If a reference message already has a parent, don't change
278 if (lastmsg
== 0 || msg
->parent
)
285 Do not create a parent/child link if creating that link
286 would introduce a loop. For example, before making
287 message A the parent of B, make sure that A is not a
292 for (m
=lastmsg
; m
; m
=m
->parent
)
293 if (strcmp(m
->msgid
, msg
->msgid
) == 0)
301 linkparent(msg
, lastmsg
);
307 (B) Create a parent/child link between the last reference
308 (or NIL if there are no references) and the current message.
309 If the current message has a parent already, break the
310 current parent/child link before creating the new one. Note
311 that if this message has no references, that it will now
314 NOTE: The parent/child links MUST be kept consistent with
315 one another at ALL times.
319 msg
=*newmsgid
? rfc822_threadsearchmsg(mt
, newmsgid
):0;
322 If a message does not contain a Message-ID header line,
323 or the Message-ID header line does not contain a valid
324 Message ID, then assign a unique Message ID to this
327 Implementation note: empty msgid, plus dupe check below,
331 if (msg
&& msg
->isdummy
)
341 ** If two or more messages have the same Message ID, assign
342 ** a unique Message ID to each of the duplicates.
344 ** Implementation note: just unlink the existing message from
345 ** it's parents/children.
349 while (msg
->firstchild
)
350 breakparent(msg
->firstchild
);
354 /* Create new entry with an empty msgid, if any more
355 ** msgids come, they'll hit the dupe check again.
360 msg
=rfc822_threadallocmsg(mt
, newmsgid
);
361 if (!msg
) return (0);
366 for (m
=lastmsg
; m
; m
=m
->parent
)
367 if (strcmp(m
->msgid
, msg
->msgid
) == 0)
370 linkparent(msg
, lastmsg
);
375 static struct imap_refmsg
*threadmsg_common(struct imap_refmsg
*m
,
376 const char *subjheader
,
377 const char *dateheader
,
378 time_t dateheader_tm
,
379 unsigned long seqnum
);
381 static struct imap_refmsg
*rfc822_threadmsgaref(struct imap_refmsgtable
*mt
,
382 const char *msgidhdr
,
383 struct rfc822a
*refhdr
,
384 const char *subjheader
,
385 const char *dateheader
,
386 time_t dateheader_tm
,
387 unsigned long seqnum
);
389 struct imap_refmsg
*rfc822_threadmsg(struct imap_refmsgtable
*mt
,
390 const char *msgidhdr
,
392 const char *subjheader
,
393 const char *dateheader
,
394 time_t dateheader_tm
,
395 unsigned long seqnum
)
399 struct imap_refmsg
*m
;
401 t
=rfc822t_alloc_new(refhdr
? refhdr
:"", NULL
, NULL
);
414 m
=rfc822_threadmsgaref(mt
, msgidhdr
, a
, subjheader
, dateheader
,
415 dateheader_tm
, seqnum
);
423 struct imap_refmsg
*rfc822_threadmsgrefs(struct imap_refmsgtable
*mt
,
425 const char * const * msgidList
,
426 const char *subjheader
,
427 const char *dateheader
,
428 time_t dateheader_tm
,
429 unsigned long seqnum
)
431 struct imap_refmsg
*m
;
432 struct rfc822token
*tArray
;
433 struct rfc822addr
*aArray
;
438 for (n
=0; msgidList
[n
]; n
++)
441 if ((tArray
=malloc((n
+1) * sizeof(*tArray
))) == NULL
)
444 if ((aArray
=malloc((n
+1) * sizeof(*aArray
))) == NULL
)
454 tArray
[i
].ptr
=msgidList
[i
];
455 tArray
[i
].len
=strlen(msgidList
[i
]);
458 aArray
[i
].tokens
=&tArray
[i
];
464 m
=rfc822_threadmsgaref(mt
, msgid_s
, &a
, subjheader
, dateheader
,
465 dateheader_tm
, seqnum
);
472 static struct imap_refmsg
*rfc822_threadmsgaref(struct imap_refmsgtable
*mt
,
473 const char *msgidhdr
,
474 struct rfc822a
*refhdr
,
475 const char *subjheader
,
476 const char *dateheader
,
477 time_t dateheader_tm
,
478 unsigned long seqnum
)
482 struct imap_refmsg
*m
;
486 t
=rfc822t_alloc_new(msgidhdr
? msgidhdr
:"", NULL
, NULL
);
496 msgid_s
=a
->naddrs
> 0 ? rfc822_getaddr(a
, 0):strdup("");
504 m
=dorefcreate(mt
, msgid_s
, refhdr
);
512 return threadmsg_common(m
, subjheader
, dateheader
,
513 dateheader_tm
, seqnum
);
516 static struct imap_refmsg
*threadmsg_common(struct imap_refmsg
*m
,
517 const char *subjheader
,
518 const char *dateheader
,
519 time_t dateheader_tm
,
520 unsigned long seqnum
)
522 if (subjheader
&& (m
->subj
=strdup(subjheader
)) == 0)
523 return (0); /* Cleanup in rfc822_threadfree() */
526 dateheader_tm
=rfc822_parsedt(dateheader
);
528 m
->timestamp
=dateheader_tm
;
536 (2) Gather together all of the messages that have no parents
537 and make them all children (siblings of one another) of a dummy
538 parent (the "root"). These messages constitute first messages
539 of the threads created thus far.
543 struct imap_refmsg
*rfc822_threadgetroot(struct imap_refmsgtable
*mt
)
545 struct imap_refmsg
*root
, *m
;
548 return (mt
->rootptr
);
550 root
=rfc822_threadallocmsg(mt
, "(root)");
552 if (!root
) return (0);
554 root
->parent
=root
; /* Temporary */
557 for (m
=mt
->firstmsg
; m
; m
=m
->next
)
560 if (m
->isdummy
&& m
->firstchild
== 0)
561 continue; /* Can happen in reference creation */
566 return (mt
->rootptr
=root
);
571 ** (3) Prune dummy messages from the thread tree. Traverse each
572 ** thread under the root, and for each message:
575 void rfc822_threadprune(struct imap_refmsgtable
*mt
)
577 struct imap_refmsg
*msg
;
579 for (msg
=mt
->firstmsg
; msg
; msg
=msg
->next
)
581 struct imap_refmsg
*saveparent
, *m
;
584 continue; /* The root, need it later. */
591 ** If it is a dummy message with NO children, delete it.
594 if (msg
->firstchild
== 0)
598 ** Don't free the node, it'll be done on msgtable
605 ** If it is a dummy message with children, delete it, but
606 ** promote its children to the current level. In other words,
607 ** splice them in with the dummy's siblings.
609 ** Do not promote the children if doing so would make them
610 ** children of the root, unless there is only one child.
613 if (msg
->firstchild
->nextsib
&&
617 saveparent
=msg
->parent
;
620 while ((m
=msg
->firstchild
) != 0)
623 linkparent(m
, saveparent
);
628 static int cmp_msgs(const void *, const void *);
630 int rfc822_threadsortsubj(struct imap_refmsg
*root
)
632 struct imap_refmsg
*toproot
;
635 ** (4) Sort the messages under the root (top-level siblings only)
636 ** by sent date. In the case of an exact match on sent date or if
637 ** either of the Date: headers used in a comparison can not be
638 ** parsed, use the order in which the messages appear in the
639 ** mailbox (that is, by sequence number) to determine the order.
640 ** In the case of a dummy message, sort its children by sent date
641 ** and then use the first child for the top-level sort.
644 struct imap_refmsg
**sortarray
;
646 for (cnt
=0, toproot
=root
->firstchild
; toproot
;
647 toproot
=toproot
->nextsib
)
649 if (toproot
->isdummy
)
650 rfc822_threadsortsubj(toproot
);
654 if ((sortarray
=malloc(sizeof(struct imap_refmsg
*)*(cnt
+1))) == 0)
657 for (cnt
=0; (toproot
=root
->firstchild
) != NULL
; ++cnt
)
659 sortarray
[cnt
]=toproot
;
660 breakparent(toproot
);
663 qsort(sortarray
, cnt
, sizeof(*sortarray
), cmp_msgs
);
665 for (i
=0; i
<cnt
; i
++)
666 linkparent(sortarray
[i
], root
);
671 int rfc822_threadgathersubj(struct imap_refmsgtable
*mt
,
672 struct imap_refmsg
*root
)
674 struct imap_refmsg
*toproot
, *p
;
677 ** (5) Gather together messages under the root that have the same
678 ** extracted subject text.
680 ** (A) Create a table for associating extracted subjects with
683 ** (B) Populate the subject table with one message per
684 ** extracted subject. For each message under the root:
687 for (toproot
=root
->firstchild
; toproot
; toproot
=toproot
->nextsib
)
690 struct imap_subjlookup
*subjtop
;
694 ** (i) Find the subject of this thread by extracting the
695 ** base subject from the current message, or its first child
696 ** if the current message is a dummy.
703 subj
=p
->subj
? p
->subj
:"";
707 ** (ii) If the extracted subject is empty, skip this
715 ** (iii) Lookup the message associated with this extracted
716 ** subject in the table.
719 if (findsubj(mt
, subj
, &isrefwd
, 1, &subjtop
))
724 ** (iv) If there is no message in the table with this
725 ** subject, add the current message and the extracted
726 ** subject to the subject table.
729 if (subjtop
->msg
== 0)
731 subjtop
->msg
=toproot
;
732 subjtop
->msgisrefwd
=isrefwd
;
737 ** Otherwise, replace the message in the table with the
738 ** current message if the message in the table is not a
739 ** dummy AND either of the following criteria are true:
742 if (!subjtop
->msg
->isdummy
)
745 ** The current message is a dummy
749 if (toproot
->isdummy
)
751 subjtop
->msg
=toproot
;
752 subjtop
->msgisrefwd
=isrefwd
;
757 ** The message in the table is a reply or forward (its
758 ** original subject contains a subj-refwd part and/or a
759 ** "(fwd)" subj-trailer) and the current message is
763 if (subjtop
->msgisrefwd
&& !isrefwd
)
765 subjtop
->msg
=toproot
;
766 subjtop
->msgisrefwd
=isrefwd
;
774 ** (C) Merge threads with the same subject. For each message
778 int rfc822_threadmergesubj(struct imap_refmsgtable
*mt
,
779 struct imap_refmsg
*root
)
781 struct imap_refmsg
*toproot
, *p
, *q
, *nextroot
;
784 for (toproot
=root
->firstchild
; toproot
; toproot
=nextroot
)
787 struct imap_subjlookup
*subjtop
;
790 nextroot
=toproot
->nextsib
;
793 ** (i) Find the subject of this thread as in step 4.B.i
801 subj
=p
->subj
? p
->subj
:"";
804 ** (ii) If the extracted subject is empty, skip this
812 ** (iii) Lookup the message associated with this extracted
813 ** subject in the table.
816 if (findsubj(mt
, subj
, &isrefwd
, 0, &subjtop
) || subjtop
== 0)
820 ** (iv) If the message in the table is the current message,
824 /* NOTE - ptr comparison IS NOT LEGAL */
826 subjtop
->msg
->flag2
=1;
832 subjtop
->msg
->flag2
=0;
835 ** Otherwise, merge the current message with the one in the
836 ** table using the following rules:
838 ** If both messages are dummies, append the current
839 ** message's children to the children of the message in
840 ** the table (the children of both messages become
841 ** siblings), and then delete the current message.
844 if (subjtop
->msg
->isdummy
&& toproot
->isdummy
)
846 while ((p
=toproot
->firstchild
) != 0)
849 linkparent(p
, subjtop
->msg
);
851 breakparent(toproot
);
856 ** If the message in the table is a dummy and the current
857 ** message is not, make the current message a child of
858 ** the message in the table (a sibling of it's children).
861 if (subjtop
->msg
->isdummy
)
863 breakparent(toproot
);
864 linkparent(toproot
, subjtop
->msg
);
869 ** If the current message is a reply or forward and the
870 ** message in the table is not, make the current message
871 ** a child of the message in the table (a sibling of it's
881 subj
=p
->subj
? p
->subj
:"";
883 str
=rfc822_coresubj(subj
, &isrefwd
);
887 free(str
); /* Don't really care */
891 breakparent(toproot
);
892 linkparent(toproot
, subjtop
->msg
);
898 ** Otherwise, create a new dummy container and make both
899 ** messages children of the dummy, and replace the
900 ** message in the table with the dummy message.
903 /* What we do is create a new message, then move the
904 ** contents of subjtop->msg (including its children)
905 ** to the new message, then make the new message a child
906 ** of subjtop->msg, and mark subjtop->msg as a dummy msg.
909 q
=rfc822_threadallocmsg(mt
, "(dummy)");
915 swapmsgdata(q
, subjtop
->msg
);
917 while ((p
=subjtop
->msg
->firstchild
) != 0)
922 linkparent(q
, subjtop
->msg
);
924 breakparent(toproot
);
925 linkparent(toproot
, subjtop
->msg
);
931 ** (6) Traverse the messages under the root and sort each set of
932 ** siblings by sent date. Traverse the messages in such a way
933 ** that the "youngest" set of siblings are sorted first, and the
934 ** "oldest" set of siblings are sorted last (grandchildren are
935 ** sorted before children, etc). In the case of an exact match on
936 ** sent date or if either of the Date: headers used in a
937 ** comparison can not be parsed, use the order in which the
938 ** messages appear in the mailbox (that is, by sequence number) to
939 ** determine the order. In the case of a dummy message (which can
940 ** only occur with top-level siblings), use its first child for
944 static int cmp_msgs(const void *a
, const void *b
)
946 struct imap_refmsg
*ma
=*(struct imap_refmsg
**)a
;
947 struct imap_refmsg
*mb
=*(struct imap_refmsg
**)b
;
949 unsigned long na
, nb
;
951 while (ma
&& ma
->isdummy
)
954 while (mb
&& mb
->isdummy
)
970 return (ta
&& tb
&& ta
!= tb
? ta
< tb
? -1: 1:
971 na
< nb
? -1: na
> nb
? 1:0);
974 struct imap_threadsortinfo
{
975 struct imap_refmsgtable
*mt
;
976 struct imap_refmsg
**sort_table
;
977 size_t sort_table_cnt
;
980 static int dothreadsort(struct imap_threadsortinfo
*,
981 struct imap_refmsg
*);
983 int rfc822_threadsortbydate(struct imap_refmsgtable
*mt
)
985 struct imap_threadsortinfo itsi
;
990 itsi
.sort_table_cnt
=0;
992 rc
=dothreadsort(&itsi
, mt
->rootptr
);
995 free(itsi
.sort_table
);
999 static int dothreadsort(struct imap_threadsortinfo
*itsi
,
1000 struct imap_refmsg
*p
)
1002 struct imap_refmsg
*q
;
1005 for (q
=p
->firstchild
; q
; q
=q
->nextsib
)
1006 dothreadsort(itsi
, q
);
1009 for (q
=p
->firstchild
; q
; q
=q
->nextsib
)
1012 if (n
> itsi
->sort_table_cnt
)
1014 struct imap_refmsg
**new_array
=(struct imap_refmsg
**)
1016 realloc(itsi
->sort_table
,
1017 sizeof(struct imap_refmsg
*)*n
)
1018 :malloc(sizeof(struct imap_refmsg
*)*n
));
1023 itsi
->sort_table
=new_array
;
1024 itsi
->sort_table_cnt
=n
;
1028 while ((q
=p
->firstchild
) != 0)
1031 itsi
->sort_table
[n
++]=q
;
1034 qsort(itsi
->sort_table
, n
, sizeof(struct imap_refmsg
*), cmp_msgs
);
1037 linkparent(itsi
->sort_table
[i
], p
);
1041 struct imap_refmsg
*rfc822_thread(struct imap_refmsgtable
*mt
)
1045 rfc822_threadprune(mt
);
1046 if ((mt
->rootptr
=rfc822_threadgetroot(mt
)) == 0)
1048 if (rfc822_threadsortsubj(mt
->rootptr
) ||
1049 rfc822_threadgathersubj(mt
, mt
->rootptr
) ||
1050 rfc822_threadmergesubj(mt
, mt
->rootptr
) ||
1051 rfc822_threadsortbydate(mt
))
1058 return (mt
->rootptr
);