2 ** Copyright 2000-2003 Double Precision, Inc.
3 ** See COPYING for distribution information.
7 ** $Id: imaprefs.c,v 1.10 2003/07/09 21:33:20 mrsam Exp $
20 static void swapmsgdata(struct imap_refmsg
*a
, struct imap_refmsg
*b
)
27 #define swap(a,b,tmp) (tmp)=(a); (a)=(b); (b)=(tmp);
29 swap(a
->msgid
, b
->msgid
, cp
);
30 swap(a
->isdummy
, b
->isdummy
, c
);
31 swap(a
->flag2
, b
->flag2
, c
);
33 swap(a
->timestamp
, b
->timestamp
, t
);
34 swap(a
->seqnum
, b
->seqnum
, ul
);
39 struct imap_refmsgtable
*rfc822_threadalloc()
41 struct imap_refmsgtable
*p
;
43 p
=(struct imap_refmsgtable
*)malloc(sizeof(struct imap_refmsgtable
));
45 memset(p
, 0, sizeof(*p
));
49 void rfc822_threadfree(struct imap_refmsgtable
*p
)
52 struct imap_refmsghash
*h
;
53 struct imap_subjlookup
*s
;
54 struct imap_refmsg
*m
;
56 for (i
=0; i
<sizeof(p
->hashtable
)/sizeof(p
->hashtable
[0]); i
++)
57 while ((h
=p
->hashtable
[i
]) != 0)
59 p
->hashtable
[i
]=h
->nexthash
;
63 for (i
=0; i
<sizeof(p
->subjtable
)/sizeof(p
->subjtable
[0]); i
++)
64 while ((s
=p
->subjtable
[i
]) != 0)
66 p
->subjtable
[i
]=s
->nextsubj
;
71 while ((m
=p
->firstmsg
) != 0)
81 static int hashmsgid(const char *msgid
)
83 unsigned long hashno
=0;
87 unsigned long n
= (hashno
<< 1);
89 #define HMIDS (((struct imap_refmsgtable *)0)->hashtable)
90 #define HHMIDSS ( sizeof(HMIDS) / sizeof( HMIDS[0] ))
92 if (hashno
& HHMIDSS
)
95 hashno
= n
^ (unsigned char)*msgid
++;
98 return (hashno
% HHMIDSS
);
101 struct imap_refmsg
*rfc822_threadallocmsg(struct imap_refmsgtable
*mt
,
104 int n
=hashmsgid(msgid
);
105 struct imap_refmsg
*msgp
= (struct imap_refmsg
*)
106 malloc(sizeof(struct imap_refmsg
)+1+strlen(msgid
));
107 struct imap_refmsghash
*h
, **hp
;
109 if (!msgp
) return (0);
110 memset(msgp
, 0, sizeof(*msgp
));
111 strcpy ((msgp
->msgid
=(char *)(msgp
+1)), msgid
);
113 h
=(struct imap_refmsghash
*)malloc(sizeof(struct imap_refmsghash
));
120 for (hp
= &mt
->hashtable
[n
]; *hp
; hp
= & (*hp
)->nexthash
)
122 if (strcmp( (*hp
)->msg
->msgid
, msgp
->msgid
) > 0)
130 msgp
->last
=mt
->lastmsg
;
133 mt
->lastmsg
->next
=msgp
;
141 struct imap_refmsg
*rfc822_threadsearchmsg(struct imap_refmsgtable
*mt
,
144 int n
=hashmsgid(msgid
);
145 struct imap_refmsghash
*h
;
147 for (h
= mt
->hashtable
[n
]; h
; h
= h
->nexthash
)
149 int rc
=strcmp(h
->msg
->msgid
, msgid
);
151 if (rc
== 0) return (h
->msg
);
158 static int findsubj(struct imap_refmsgtable
*mt
, const char *s
, int *isrefwd
,
159 int create
, struct imap_subjlookup
**ptr
)
161 char *ss
=rfc822_coresubj(s
, isrefwd
);
163 struct imap_subjlookup
**h
;
164 struct imap_subjlookup
*newsubj
;
166 if (!ss
) return (-1);
169 for (h
= &mt
->subjtable
[n
]; *h
; h
= &(*h
)->nextsubj
)
171 int rc
=strcmp((*h
)->subj
, ss
);
189 newsubj
=malloc(sizeof(struct imap_subjlookup
));
195 memset(newsubj
, 0, sizeof(*newsubj
));
197 newsubj
->nextsubj
= *h
;
198 newsubj
->msgisrefwd
= *isrefwd
;
204 static void linkparent(struct imap_refmsg
*msg
, struct imap_refmsg
*lastmsg
)
207 msg
->prevsib
=lastmsg
->lastchild
;
209 msg
->prevsib
->nextsib
=msg
;
211 lastmsg
->firstchild
=msg
;
213 lastmsg
->lastchild
=msg
;
218 static void breakparent(struct imap_refmsg
*m
)
220 if (!m
->parent
) return;
222 if (m
->prevsib
) m
->prevsib
->nextsib
=m
->nextsib
;
223 else m
->parent
->firstchild
=m
->nextsib
;
225 if (m
->nextsib
) m
->nextsib
->prevsib
=m
->prevsib
;
226 else m
->parent
->lastchild
=m
->prevsib
;
230 static struct imap_refmsg
*dorefcreate(struct imap_refmsgtable
*mt
,
231 const char *newmsgid
,
233 /* a - references header */
235 struct imap_refmsg
*lastmsg
=0, *m
;
236 struct imap_refmsg
*msg
;
240 (A) Using the Message-IDs in the message's references, link
241 the corresponding messages together as parent/child. Make
242 the first reference the parent of the second (and the second
243 a child of the first), the second the parent of the third
244 (and the third a child of the second), etc. The following
245 rules govern the creation of these links:
247 If no reference message can be found with a given
248 Message-ID, create a dummy message with this ID. Use
249 this dummy message for all subsequent references to this
253 for (n
=0; n
<a
->naddrs
; n
++)
255 char *msgid
=rfc822_getaddr(a
, n
);
257 msg
=*msgid
? rfc822_threadsearchmsg(mt
, msgid
? msgid
:""):0;
260 msg
=rfc822_threadallocmsg(mt
, msgid
? msgid
:"");
275 If a reference message already has a parent, don't change
279 if (lastmsg
== 0 || msg
->parent
)
286 Do not create a parent/child link if creating that link
287 would introduce a loop. For example, before making
288 message A the parent of B, make sure that A is not a
293 for (m
=lastmsg
; m
; m
=m
->parent
)
294 if (strcmp(m
->msgid
, msg
->msgid
) == 0)
302 linkparent(msg
, lastmsg
);
308 (B) Create a parent/child link between the last reference
309 (or NIL if there are no references) and the current message.
310 If the current message has a parent already, break the
311 current parent/child link before creating the new one. Note
312 that if this message has no references, that it will now
315 NOTE: The parent/child links MUST be kept consistent with
316 one another at ALL times.
320 msg
=*newmsgid
? rfc822_threadsearchmsg(mt
, newmsgid
):0;
323 If a message does not contain a Message-ID header line,
324 or the Message-ID header line does not contain a valid
325 Message ID, then assign a unique Message ID to this
328 Implementation note: empty msgid, plus dupe check below,
332 if (msg
&& msg
->isdummy
)
342 ** If two or more messages have the same Message ID, assign
343 ** a unique Message ID to each of the duplicates.
345 ** Implementation note: just unlink the existing message from
346 ** it's parents/children.
350 while (msg
->firstchild
)
351 breakparent(msg
->firstchild
);
355 /* Create new entry with an empty msgid, if any more
356 ** msgids come, they'll hit the dupe check again.
361 msg
=rfc822_threadallocmsg(mt
, newmsgid
);
362 if (!msg
) return (0);
367 for (m
=lastmsg
; m
; m
=m
->parent
)
368 if (strcmp(m
->msgid
, msg
->msgid
) == 0)
371 linkparent(msg
, lastmsg
);
376 static struct imap_refmsg
*threadmsg_common(struct imap_refmsg
*m
,
377 const char *subjheader
,
378 const char *dateheader
,
379 time_t dateheader_tm
,
380 unsigned long seqnum
);
382 static struct imap_refmsg
*rfc822_threadmsgaref(struct imap_refmsgtable
*mt
,
383 const char *msgidhdr
,
384 struct rfc822a
*refhdr
,
385 const char *subjheader
,
386 const char *dateheader
,
387 time_t dateheader_tm
,
388 unsigned long seqnum
);
390 struct imap_refmsg
*rfc822_threadmsg(struct imap_refmsgtable
*mt
,
391 const char *msgidhdr
,
393 const char *subjheader
,
394 const char *dateheader
,
395 time_t dateheader_tm
,
396 unsigned long seqnum
)
400 struct imap_refmsg
*m
;
402 t
=rfc822t_alloc_new(refhdr
? refhdr
:"", NULL
, NULL
);
415 m
=rfc822_threadmsgaref(mt
, msgidhdr
, a
, subjheader
, dateheader
,
416 dateheader_tm
, seqnum
);
424 struct imap_refmsg
*rfc822_threadmsgrefs(struct imap_refmsgtable
*mt
,
426 const char * const * msgidList
,
427 const char *subjheader
,
428 const char *dateheader
,
429 time_t dateheader_tm
,
430 unsigned long seqnum
)
432 struct imap_refmsg
*m
;
433 struct rfc822token
*tArray
;
434 struct rfc822addr
*aArray
;
439 for (n
=0; msgidList
[n
]; n
++)
442 if ((tArray
=malloc((n
+1) * sizeof(*tArray
))) == NULL
)
445 if ((aArray
=malloc((n
+1) * sizeof(*aArray
))) == NULL
)
455 tArray
[i
].ptr
=msgidList
[i
];
456 tArray
[i
].len
=strlen(msgidList
[i
]);
459 aArray
[i
].tokens
=&tArray
[i
];
465 m
=rfc822_threadmsgaref(mt
, msgid_s
, &a
, subjheader
, dateheader
,
466 dateheader_tm
, seqnum
);
473 static struct imap_refmsg
*rfc822_threadmsgaref(struct imap_refmsgtable
*mt
,
474 const char *msgidhdr
,
475 struct rfc822a
*refhdr
,
476 const char *subjheader
,
477 const char *dateheader
,
478 time_t dateheader_tm
,
479 unsigned long seqnum
)
483 struct imap_refmsg
*m
;
487 t
=rfc822t_alloc_new(msgidhdr
? msgidhdr
:"", NULL
, NULL
);
497 msgid_s
=a
->naddrs
> 0 ? rfc822_getaddr(a
, 0):strdup("");
505 m
=dorefcreate(mt
, msgid_s
, refhdr
);
513 return threadmsg_common(m
, subjheader
, dateheader
,
514 dateheader_tm
, seqnum
);
517 static struct imap_refmsg
*threadmsg_common(struct imap_refmsg
*m
,
518 const char *subjheader
,
519 const char *dateheader
,
520 time_t dateheader_tm
,
521 unsigned long seqnum
)
523 if (subjheader
&& (m
->subj
=strdup(subjheader
)) == 0)
524 return (0); /* Cleanup in rfc822_threadfree() */
527 dateheader_tm
=rfc822_parsedt(dateheader
);
529 m
->timestamp
=dateheader_tm
;
537 (2) Gather together all of the messages that have no parents
538 and make them all children (siblings of one another) of a dummy
539 parent (the "root"). These messages constitute first messages
540 of the threads created thus far.
544 struct imap_refmsg
*rfc822_threadgetroot(struct imap_refmsgtable
*mt
)
546 struct imap_refmsg
*root
, *m
;
549 return (mt
->rootptr
);
551 root
=rfc822_threadallocmsg(mt
, "(root)");
553 if (!root
) return (0);
555 root
->parent
=root
; /* Temporary */
558 for (m
=mt
->firstmsg
; m
; m
=m
->next
)
561 if (m
->isdummy
&& m
->firstchild
== 0)
562 continue; /* Can happen in reference creation */
567 return (mt
->rootptr
=root
);
572 ** (3) Prune dummy messages from the thread tree. Traverse each
573 ** thread under the root, and for each message:
576 void rfc822_threadprune(struct imap_refmsgtable
*mt
)
578 struct imap_refmsg
*msg
;
580 for (msg
=mt
->firstmsg
; msg
; msg
=msg
->next
)
582 struct imap_refmsg
*saveparent
, *m
;
585 continue; /* The root, need it later. */
592 ** If it is a dummy message with NO children, delete it.
595 if (msg
->firstchild
== 0)
599 ** Don't free the node, it'll be done on msgtable
606 ** If it is a dummy message with children, delete it, but
607 ** promote its children to the current level. In other words,
608 ** splice them in with the dummy's siblings.
610 ** Do not promote the children if doing so would make them
611 ** children of the root, unless there is only one child.
614 if (msg
->firstchild
->nextsib
&&
618 saveparent
=msg
->parent
;
621 while ((m
=msg
->firstchild
) != 0)
624 linkparent(m
, saveparent
);
629 static int cmp_msgs(const void *, const void *);
631 int rfc822_threadsortsubj(struct imap_refmsg
*root
)
633 struct imap_refmsg
*toproot
;
636 ** (4) Sort the messages under the root (top-level siblings only)
637 ** by sent date. In the case of an exact match on sent date or if
638 ** either of the Date: headers used in a comparison can not be
639 ** parsed, use the order in which the messages appear in the
640 ** mailbox (that is, by sequence number) to determine the order.
641 ** In the case of a dummy message, sort its children by sent date
642 ** and then use the first child for the top-level sort.
645 struct imap_refmsg
**sortarray
;
647 for (cnt
=0, toproot
=root
->firstchild
; toproot
;
648 toproot
=toproot
->nextsib
)
650 if (toproot
->isdummy
)
651 rfc822_threadsortsubj(toproot
);
655 if ((sortarray
=malloc(sizeof(struct imap_refmsg
*)*(cnt
+1))) == 0)
658 for (cnt
=0; (toproot
=root
->firstchild
) != NULL
; ++cnt
)
660 sortarray
[cnt
]=toproot
;
661 breakparent(toproot
);
664 qsort(sortarray
, cnt
, sizeof(*sortarray
), cmp_msgs
);
666 for (i
=0; i
<cnt
; i
++)
667 linkparent(sortarray
[i
], root
);
672 int rfc822_threadgathersubj(struct imap_refmsgtable
*mt
,
673 struct imap_refmsg
*root
)
675 struct imap_refmsg
*toproot
, *p
;
678 ** (5) Gather together messages under the root that have the same
679 ** extracted subject text.
681 ** (A) Create a table for associating extracted subjects with
684 ** (B) Populate the subject table with one message per
685 ** extracted subject. For each message under the root:
688 for (toproot
=root
->firstchild
; toproot
; toproot
=toproot
->nextsib
)
691 struct imap_subjlookup
*subjtop
;
695 ** (i) Find the subject of this thread by extracting the
696 ** base subject from the current message, or its first child
697 ** if the current message is a dummy.
704 subj
=p
->subj
? p
->subj
:"";
708 ** (ii) If the extracted subject is empty, skip this
716 ** (iii) Lookup the message associated with this extracted
717 ** subject in the table.
720 if (findsubj(mt
, subj
, &isrefwd
, 1, &subjtop
))
725 ** (iv) If there is no message in the table with this
726 ** subject, add the current message and the extracted
727 ** subject to the subject table.
730 if (subjtop
->msg
== 0)
732 subjtop
->msg
=toproot
;
733 subjtop
->msgisrefwd
=isrefwd
;
738 ** Otherwise, replace the message in the table with the
739 ** current message if the message in the table is not a
740 ** dummy AND either of the following criteria are true:
743 if (!subjtop
->msg
->isdummy
)
746 ** The current message is a dummy
750 if (toproot
->isdummy
)
752 subjtop
->msg
=toproot
;
753 subjtop
->msgisrefwd
=isrefwd
;
758 ** The message in the table is a reply or forward (its
759 ** original subject contains a subj-refwd part and/or a
760 ** "(fwd)" subj-trailer) and the current message is
764 if (subjtop
->msgisrefwd
&& !isrefwd
)
766 subjtop
->msg
=toproot
;
767 subjtop
->msgisrefwd
=isrefwd
;
775 ** (C) Merge threads with the same subject. For each message
779 int rfc822_threadmergesubj(struct imap_refmsgtable
*mt
,
780 struct imap_refmsg
*root
)
782 struct imap_refmsg
*toproot
, *p
, *q
, *nextroot
;
785 for (toproot
=root
->firstchild
; toproot
; toproot
=nextroot
)
788 struct imap_subjlookup
*subjtop
;
791 nextroot
=toproot
->nextsib
;
794 ** (i) Find the subject of this thread as in step 4.B.i
802 subj
=p
->subj
? p
->subj
:"";
805 ** (ii) If the extracted subject is empty, skip this
813 ** (iii) Lookup the message associated with this extracted
814 ** subject in the table.
817 if (findsubj(mt
, subj
, &isrefwd
, 0, &subjtop
) || subjtop
== 0)
821 ** (iv) If the message in the table is the current message,
825 /* NOTE - ptr comparison IS NOT LEGAL */
827 subjtop
->msg
->flag2
=1;
833 subjtop
->msg
->flag2
=0;
836 ** Otherwise, merge the current message with the one in the
837 ** table using the following rules:
839 ** If both messages are dummies, append the current
840 ** message's children to the children of the message in
841 ** the table (the children of both messages become
842 ** siblings), and then delete the current message.
845 if (subjtop
->msg
->isdummy
&& toproot
->isdummy
)
847 while ((p
=toproot
->firstchild
) != 0)
850 linkparent(p
, subjtop
->msg
);
852 breakparent(toproot
);
857 ** If the message in the table is a dummy and the current
858 ** message is not, make the current message a child of
859 ** the message in the table (a sibling of it's children).
862 if (subjtop
->msg
->isdummy
)
864 breakparent(toproot
);
865 linkparent(toproot
, subjtop
->msg
);
870 ** If the current message is a reply or forward and the
871 ** message in the table is not, make the current message
872 ** a child of the message in the table (a sibling of it's
882 subj
=p
->subj
? p
->subj
:"";
884 str
=rfc822_coresubj(subj
, &isrefwd
);
888 free(str
); /* Don't really care */
892 breakparent(toproot
);
893 linkparent(toproot
, subjtop
->msg
);
899 ** Otherwise, create a new dummy container and make both
900 ** messages children of the dummy, and replace the
901 ** message in the table with the dummy message.
904 /* What we do is create a new message, then move the
905 ** contents of subjtop->msg (including its children)
906 ** to the new message, then make the new message a child
907 ** of subjtop->msg, and mark subjtop->msg as a dummy msg.
910 q
=rfc822_threadallocmsg(mt
, "(dummy)");
916 swapmsgdata(q
, subjtop
->msg
);
918 while ((p
=subjtop
->msg
->firstchild
) != 0)
923 linkparent(q
, subjtop
->msg
);
925 breakparent(toproot
);
926 linkparent(toproot
, subjtop
->msg
);
932 ** (6) Traverse the messages under the root and sort each set of
933 ** siblings by sent date. Traverse the messages in such a way
934 ** that the "youngest" set of siblings are sorted first, and the
935 ** "oldest" set of siblings are sorted last (grandchildren are
936 ** sorted before children, etc). In the case of an exact match on
937 ** sent date or if either of the Date: headers used in a
938 ** comparison can not be parsed, use the order in which the
939 ** messages appear in the mailbox (that is, by sequence number) to
940 ** determine the order. In the case of a dummy message (which can
941 ** only occur with top-level siblings), use its first child for
945 static int cmp_msgs(const void *a
, const void *b
)
947 struct imap_refmsg
*ma
=*(struct imap_refmsg
**)a
;
948 struct imap_refmsg
*mb
=*(struct imap_refmsg
**)b
;
950 unsigned long na
, nb
;
952 while (ma
&& ma
->isdummy
)
955 while (mb
&& mb
->isdummy
)
971 return (ta
&& tb
&& ta
!= tb
? ta
< tb
? -1: 1:
972 na
< nb
? -1: na
> nb
? 1:0);
975 struct imap_threadsortinfo
{
976 struct imap_refmsgtable
*mt
;
977 struct imap_refmsg
**sort_table
;
978 size_t sort_table_cnt
;
981 static int dothreadsort(struct imap_threadsortinfo
*,
982 struct imap_refmsg
*);
984 int rfc822_threadsortbydate(struct imap_refmsgtable
*mt
)
986 struct imap_threadsortinfo itsi
;
991 itsi
.sort_table_cnt
=0;
993 rc
=dothreadsort(&itsi
, mt
->rootptr
);
996 free(itsi
.sort_table
);
1000 static int dothreadsort(struct imap_threadsortinfo
*itsi
,
1001 struct imap_refmsg
*p
)
1003 struct imap_refmsg
*q
;
1006 for (q
=p
->firstchild
; q
; q
=q
->nextsib
)
1007 dothreadsort(itsi
, q
);
1010 for (q
=p
->firstchild
; q
; q
=q
->nextsib
)
1013 if (n
> itsi
->sort_table_cnt
)
1015 struct imap_refmsg
**new_array
=(struct imap_refmsg
**)
1017 realloc(itsi
->sort_table
,
1018 sizeof(struct imap_refmsg
*)*n
)
1019 :malloc(sizeof(struct imap_refmsg
*)*n
));
1024 itsi
->sort_table
=new_array
;
1025 itsi
->sort_table_cnt
=n
;
1029 while ((q
=p
->firstchild
) != 0)
1032 itsi
->sort_table
[n
++]=q
;
1035 qsort(itsi
->sort_table
, n
, sizeof(struct imap_refmsg
*), cmp_msgs
);
1038 linkparent(itsi
->sort_table
[i
], p
);
1042 struct imap_refmsg
*rfc822_thread(struct imap_refmsgtable
*mt
)
1046 rfc822_threadprune(mt
);
1047 if ((mt
->rootptr
=rfc822_threadgetroot(mt
)) == 0)
1049 if (rfc822_threadsortsubj(mt
->rootptr
) ||
1050 rfc822_threadgathersubj(mt
, mt
->rootptr
) ||
1051 rfc822_threadmergesubj(mt
, mt
->rootptr
) ||
1052 rfc822_threadsortbydate(mt
))
1059 return (mt
->rootptr
);