Commit | Line | Data |
---|---|---|
1938c6da | 1 | /* Copyright (C) 1999,2000,2001,2002, 2004 Free Software Foundation, Inc. |
73be1d9e MV |
2 | * This library is free software; you can redistribute it and/or |
3 | * modify it under the terms of the GNU Lesser General Public | |
4 | * License as published by the Free Software Foundation; either | |
5 | * version 2.1 of the License, or (at your option) any later version. | |
54e09076 | 6 | * |
73be1d9e MV |
7 | * This library is distributed in the hope that it will be useful, |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10 | * Lesser General Public License for more details. | |
54e09076 | 11 | * |
73be1d9e MV |
12 | * You should have received a copy of the GNU Lesser General Public |
13 | * License along with this library; if not, write to the Free Software | |
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
15 | */ | |
54e09076 | 16 | |
1bbd0b84 GB |
17 | |
18 | ||
54e09076 MD |
19 | /* Written in December 1998 by Roland Orre <orre@nada.kth.se> |
20 | * This implements the same sort interface as slib/sort.scm | |
21 | * for lists and vectors where slib defines: | |
22 | * sorted?, merge, merge!, sort, sort! | |
15d9c4e3 | 23 | * For scsh compatibility sort-list and sort-list! are also defined. |
54e09076 | 24 | * In cases where a stable-sort is required use stable-sort or |
15d9c4e3 | 25 | * stable-sort!. An additional feature is |
54e09076 | 26 | * (restricted-vector-sort! vector less? startpos endpos) |
15d9c4e3 | 27 | * which allows you to sort part of a vector. |
54e09076 MD |
28 | * Thanks to Aubrey Jaffer for the slib/sort.scm library. |
29 | * Thanks to Richard A. O'Keefe (based on Prolog code by D.H.D.Warren) | |
30 | * for the merge sort inspiration. | |
31 | * Thanks to Douglas C. Schmidt (schmidt@ics.uci.edu) for the | |
32 | * quicksort code. | |
33 | */ | |
34 | ||
a0599745 | 35 | #include "libguile/_scm.h" |
a0599745 MD |
36 | #include "libguile/eval.h" |
37 | #include "libguile/unif.h" | |
38 | #include "libguile/ramap.h" | |
a0599745 MD |
39 | #include "libguile/feature.h" |
40 | #include "libguile/vectors.h" | |
c96d76b8 | 41 | #include "libguile/lang.h" |
ee1ac75b | 42 | #include "libguile/async.h" |
54e09076 | 43 | |
a0599745 MD |
44 | #include "libguile/validate.h" |
45 | #include "libguile/sort.h" | |
54e09076 MD |
46 | |
47 | /* The routine quicksort was extracted from the GNU C Library qsort.c | |
48 | written by Douglas C. Schmidt (schmidt@ics.uci.edu) | |
49 | and adapted to guile by adding an extra pointer less | |
50 | to quicksort by Roland Orre <orre@nada.kth.se>. | |
51 | ||
52 | The reason to do this instead of using the library function qsort | |
53 | was to avoid dependency of the ANSI-C extensions for local functions | |
54 | and also to avoid obscure pool based solutions. | |
15d9c4e3 MD |
55 | |
56 | This sorting routine is not much more efficient than the stable | |
57 | version but doesn't consume extra memory. | |
54e09076 MD |
58 | */ |
59 | ||
d339981a | 60 | #define SWAP(a, b) do { const SCM _tmp = a; a = b; b = _tmp; } while (0) |
54e09076 MD |
61 | |
62 | ||
63 | /* Order size using quicksort. This implementation incorporates | |
64 | four optimizations discussed in Sedgewick: | |
65 | ||
d339981a DH |
66 | 1. Non-recursive, using an explicit stack of pointer that store the next |
67 | array partition to sort. To save time, this maximum amount of space | |
68 | required to store an array of MAX_SIZE_T is allocated on the stack. | |
69 | Assuming a bit width of 32 bits for size_t, this needs only | |
70 | 32 * sizeof (stack_node) == 128 bytes. Pretty cheap, actually. | |
54e09076 | 71 | |
d339981a DH |
72 | 2. Chose the pivot element using a median-of-three decision tree. This |
73 | reduces the probability of selecting a bad pivot value and eliminates | |
74 | certain extraneous comparisons. | |
54e09076 | 75 | |
d339981a DH |
76 | 3. Only quicksorts NR_ELEMS / MAX_THRESH partitions, leaving insertion sort |
77 | to order the MAX_THRESH items within each partition. This is a big win, | |
78 | since insertion sort is faster for small, mostly sorted array segments. | |
54e09076 MD |
79 | |
80 | 4. The larger of the two sub-partitions is always pushed onto the | |
81 | stack first, with the algorithm then concentrating on the | |
82 | smaller partition. This *guarantees* no more than log (n) | |
83 | stack size is needed (actually O(1) in this case)! */ | |
84 | ||
54e09076 | 85 | |
d339981a DH |
86 | /* Discontinue quicksort algorithm when partition gets below this size. |
87 | * This particular magic number was chosen to work best on a Sun 4/260. */ | |
88 | #define MAX_THRESH 4 | |
89 | ||
90 | ||
91 | /* Inline stack abstraction: The stack size for quicksorting at most as many | |
92 | * elements as can be given by a value of type size_t is, as described above, | |
93 | * log (MAX_SIZE_T), which is the number of bits of size_t. More accurately, | |
94 | * we would only need ceil (log (MAX_SIZE_T / MAX_THRESH)), but this is | |
95 | * ignored below. */ | |
96 | ||
97 | /* Stack node declarations used to store unfulfilled partition obligations. */ | |
98 | typedef struct { | |
99 | size_t lo; | |
100 | size_t hi; | |
101 | } stack_node; | |
102 | ||
103 | #define STACK_SIZE (8 * sizeof (size_t)) /* assume 8 bit char */ | |
104 | #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) | |
105 | #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) | |
106 | #define STACK_NOT_EMPTY (stack < top) | |
107 | ||
a34af05e | 108 | |
54e09076 | 109 | static void |
d339981a | 110 | quicksort (SCM *const base_ptr, size_t nr_elems, scm_t_trampoline_2 cmp, SCM less) |
54e09076 | 111 | { |
d339981a | 112 | static const char s_buggy_less[] = "buggy less predicate used when sorting"; |
54e09076 | 113 | |
d339981a | 114 | if (nr_elems == 0) |
54e09076 MD |
115 | /* Avoid lossage with unsigned arithmetic below. */ |
116 | return; | |
117 | ||
d339981a | 118 | if (nr_elems > MAX_THRESH) |
54e09076 | 119 | { |
d339981a DH |
120 | size_t lo = 0; |
121 | size_t hi = nr_elems - 1; | |
122 | ||
54e09076 MD |
123 | stack_node stack[STACK_SIZE]; |
124 | stack_node *top = stack + 1; | |
125 | ||
126 | while (STACK_NOT_EMPTY) | |
127 | { | |
d339981a DH |
128 | size_t left; |
129 | size_t right; | |
1938c6da | 130 | size_t mid = lo + (hi - lo) / 2; |
14e9281b | 131 | SCM pivot; |
54e09076 MD |
132 | |
133 | /* Select median value from among LO, MID, and HI. Rearrange | |
134 | LO and HI so the three values are sorted. This lowers the | |
135 | probability of picking a pathological pivot value and | |
d339981a | 136 | skips a comparison for both the left and right. */ |
54e09076 | 137 | |
1938c6da MV |
138 | SCM_TICK; |
139 | ||
7888309b | 140 | if (scm_is_true ((*cmp) (less, base_ptr[mid], base_ptr[lo]))) |
d339981a | 141 | SWAP (base_ptr[mid], base_ptr[lo]); |
7888309b | 142 | if (scm_is_true ((*cmp) (less, base_ptr[hi], base_ptr[mid]))) |
d339981a | 143 | SWAP (base_ptr[mid], base_ptr[hi]); |
54e09076 MD |
144 | else |
145 | goto jump_over; | |
7888309b | 146 | if (scm_is_true ((*cmp) (less, base_ptr[mid], base_ptr[lo]))) |
d339981a | 147 | SWAP (base_ptr[mid], base_ptr[lo]); |
54e09076 | 148 | jump_over:; |
54e09076 | 149 | |
14e9281b | 150 | pivot = base_ptr[mid]; |
d339981a DH |
151 | left = lo + 1; |
152 | right = hi - 1; | |
54e09076 MD |
153 | |
154 | /* Here's the famous ``collapse the walls'' section of quicksort. | |
155 | Gotta like those tight inner loops! They are the main reason | |
156 | that this algorithm runs much faster than others. */ | |
157 | do | |
158 | { | |
14e9281b | 159 | while (scm_is_true ((*cmp) (less, base_ptr[left], pivot))) |
a34af05e | 160 | { |
d339981a | 161 | left++; |
a34af05e | 162 | /* The comparison predicate may be buggy */ |
d339981a | 163 | if (left > hi) |
5d2d2ffc | 164 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 165 | } |
54e09076 | 166 | |
14e9281b | 167 | while (scm_is_true ((*cmp) (less, pivot, base_ptr[right]))) |
a34af05e | 168 | { |
d339981a | 169 | right--; |
a34af05e | 170 | /* The comparison predicate may be buggy */ |
d339981a | 171 | if (right < lo) |
5d2d2ffc | 172 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 173 | } |
54e09076 | 174 | |
d339981a | 175 | if (left < right) |
54e09076 | 176 | { |
d339981a DH |
177 | SWAP (base_ptr[left], base_ptr[right]); |
178 | left++; | |
179 | right--; | |
54e09076 | 180 | } |
d339981a | 181 | else if (left == right) |
54e09076 | 182 | { |
d339981a DH |
183 | left++; |
184 | right--; | |
54e09076 MD |
185 | break; |
186 | } | |
187 | } | |
d339981a | 188 | while (left <= right); |
54e09076 MD |
189 | |
190 | /* Set up pointers for next iteration. First determine whether | |
191 | left and right partitions are below the threshold size. If so, | |
192 | ignore one or both. Otherwise, push the larger partition's | |
193 | bounds on the stack and continue sorting the smaller one. */ | |
194 | ||
d339981a | 195 | if ((size_t) (right - lo) <= MAX_THRESH) |
54e09076 | 196 | { |
d339981a | 197 | if ((size_t) (hi - left) <= MAX_THRESH) |
54e09076 MD |
198 | /* Ignore both small partitions. */ |
199 | POP (lo, hi); | |
200 | else | |
201 | /* Ignore small left partition. */ | |
d339981a | 202 | lo = left; |
54e09076 | 203 | } |
d339981a | 204 | else if ((size_t) (hi - left) <= MAX_THRESH) |
54e09076 | 205 | /* Ignore small right partition. */ |
d339981a DH |
206 | hi = right; |
207 | else if ((right - lo) > (hi - left)) | |
54e09076 MD |
208 | { |
209 | /* Push larger left partition indices. */ | |
d339981a DH |
210 | PUSH (lo, right); |
211 | lo = left; | |
54e09076 MD |
212 | } |
213 | else | |
214 | { | |
215 | /* Push larger right partition indices. */ | |
d339981a DH |
216 | PUSH (left, hi); |
217 | hi = right; | |
54e09076 MD |
218 | } |
219 | } | |
220 | } | |
221 | ||
d339981a DH |
222 | /* Once the BASE_PTR array is partially sorted by quicksort the rest is |
223 | completely sorted using insertion sort, since this is efficient for | |
224 | partitions below MAX_THRESH size. BASE_PTR points to the beginning of the | |
225 | array to sort, and END idexes the very last element in the array (*not* | |
226 | one beyond it!). */ | |
54e09076 MD |
227 | |
228 | { | |
d339981a DH |
229 | size_t tmp = 0; |
230 | size_t end = nr_elems - 1; | |
231 | size_t thresh = min (end, MAX_THRESH); | |
232 | size_t run; | |
54e09076 MD |
233 | |
234 | /* Find smallest element in first threshold and place it at the | |
235 | array's beginning. This is the smallest array element, | |
236 | and the operation speeds up insertion sort's inner loop. */ | |
237 | ||
d339981a | 238 | for (run = tmp + 1; run <= thresh; run++) |
7888309b | 239 | if (scm_is_true ((*cmp) (less, base_ptr[run], base_ptr[tmp]))) |
d339981a | 240 | tmp = run; |
54e09076 | 241 | |
d339981a DH |
242 | if (tmp != 0) |
243 | SWAP (base_ptr[tmp], base_ptr[0]); | |
54e09076 MD |
244 | |
245 | /* Insertion sort, running from left-hand-side up to right-hand-side. */ | |
246 | ||
d339981a DH |
247 | run = 1; |
248 | while (++run <= end) | |
54e09076 | 249 | { |
ee1ac75b MV |
250 | SCM_TICK; |
251 | ||
d339981a | 252 | tmp = run - 1; |
7888309b | 253 | while (scm_is_true ((*cmp) (less, base_ptr[run], base_ptr[tmp]))) |
a34af05e | 254 | { |
a34af05e | 255 | /* The comparison predicate may be buggy */ |
d339981a | 256 | if (tmp == 0) |
5d2d2ffc | 257 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
d339981a DH |
258 | |
259 | tmp--; | |
a34af05e | 260 | } |
54e09076 | 261 | |
d339981a DH |
262 | tmp++; |
263 | if (tmp != run) | |
54e09076 | 264 | { |
d339981a DH |
265 | SCM to_insert = base_ptr[run]; |
266 | size_t hi, lo; | |
267 | ||
268 | for (hi = lo = run; --lo >= tmp; hi = lo) | |
269 | base_ptr[hi] = base_ptr[lo]; | |
270 | base_ptr[hi] = to_insert; | |
54e09076 MD |
271 | } |
272 | } | |
273 | } | |
d339981a | 274 | } |
54e09076 | 275 | |
54e09076 | 276 | |
d339981a DH |
277 | static scm_t_trampoline_2 |
278 | compare_function (SCM less, unsigned int arg_nr, const char* fname) | |
54e09076 | 279 | { |
d339981a DH |
280 | const scm_t_trampoline_2 cmp = scm_trampoline_2 (less); |
281 | SCM_ASSERT_TYPE (cmp != NULL, less, arg_nr, fname, "less predicate"); | |
282 | return cmp; | |
283 | } | |
54e09076 | 284 | |
54e09076 MD |
285 | |
286 | /* Question: Is there any need to make this a more general array sort? | |
287 | It is probably enough to manage the vector type. */ | |
288 | /* endpos equal as for substring, i.e. endpos is not included. */ | |
da4a1dba | 289 | /* More natural with length? */ |
1bbd0b84 | 290 | |
a1ec6916 | 291 | SCM_DEFINE (scm_restricted_vector_sort_x, "restricted-vector-sort!", 4, 0, 0, |
1bbd0b84 | 292 | (SCM vec, SCM less, SCM startpos, SCM endpos), |
e3239868 | 293 | "Sort the vector @var{vec}, using @var{less} for comparing\n" |
3bdf7962 MV |
294 | "the vector elements. @var{startpos} (inclusively) and\n" |
295 | "@var{endpos} (exclusively) delimit\n" | |
e3239868 DH |
296 | "the range of the vector which gets sorted. The return value\n" |
297 | "is not specified.") | |
1bbd0b84 | 298 | #define FUNC_NAME s_scm_restricted_vector_sort_x |
54e09076 | 299 | { |
d339981a DH |
300 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
301 | size_t vlen, spos, len; | |
54e09076 MD |
302 | SCM *vp; |
303 | ||
34d19ef6 | 304 | SCM_VALIDATE_VECTOR (1, vec); |
34d19ef6 | 305 | vp = SCM_WRITABLE_VELTS (vec); /* vector pointer */ |
b5c2579a | 306 | vlen = SCM_VECTOR_LENGTH (vec); |
54e09076 | 307 | |
a55c2b68 | 308 | spos = scm_to_unsigned_integer (startpos, 0, vlen); |
3bdf7962 | 309 | len = scm_to_unsigned_integer (endpos, spos, vlen) - spos; |
54e09076 | 310 | |
d339981a DH |
311 | quicksort (&vp[spos], len, cmp, less); |
312 | scm_remember_upto_here_1 (vec); | |
34d19ef6 | 313 | |
d339981a | 314 | return SCM_UNSPECIFIED; |
1bbd0b84 GB |
315 | } |
316 | #undef FUNC_NAME | |
54e09076 | 317 | |
d339981a | 318 | |
54e09076 MD |
319 | /* (sorted? sequence less?) |
320 | * is true when sequence is a list (x0 x1 ... xm) or a vector #(x0 ... xm) | |
321 | * such that for all 1 <= i <= m, | |
322 | * (not (less? (list-ref list i) (list-ref list (- i 1)))). */ | |
a1ec6916 | 323 | SCM_DEFINE (scm_sorted_p, "sorted?", 2, 0, 0, |
1bbd0b84 | 324 | (SCM items, SCM less), |
e3239868 DH |
325 | "Return @code{#t} iff @var{items} is a list or a vector such that\n" |
326 | "for all 1 <= i <= m, the predicate @var{less} returns true when\n" | |
327 | "applied to all elements i - 1 and i") | |
1bbd0b84 | 328 | #define FUNC_NAME s_scm_sorted_p |
54e09076 | 329 | { |
d339981a | 330 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 331 | long len, j; /* list/vector length, temp j */ |
54e09076 | 332 | SCM item, rest; /* rest of items loop variable */ |
34d19ef6 | 333 | SCM const *vp; |
54e09076 | 334 | |
c96d76b8 | 335 | if (SCM_NULL_OR_NIL_P (items)) |
54e09076 | 336 | return SCM_BOOL_T; |
1bbd0b84 | 337 | |
d2e53ed6 | 338 | if (scm_is_pair (items)) |
54e09076 MD |
339 | { |
340 | len = scm_ilength (items); /* also checks that it's a pure list */ | |
34d19ef6 | 341 | SCM_ASSERT_RANGE (1, items, len >= 0); |
54e09076 MD |
342 | if (len <= 1) |
343 | return SCM_BOOL_T; | |
344 | ||
345 | item = SCM_CAR (items); | |
346 | rest = SCM_CDR (items); | |
347 | j = len - 1; | |
348 | while (j > 0) | |
349 | { | |
7888309b | 350 | if (scm_is_true ((*cmp) (less, SCM_CAR (rest), item))) |
54e09076 MD |
351 | return SCM_BOOL_F; |
352 | else | |
353 | { | |
354 | item = SCM_CAR (rest); | |
355 | rest = SCM_CDR (rest); | |
356 | j--; | |
357 | } | |
358 | } | |
359 | return SCM_BOOL_T; | |
360 | } | |
361 | else | |
362 | { | |
b5c2579a DH |
363 | SCM_VALIDATE_VECTOR (1, items); |
364 | ||
365 | vp = SCM_VELTS (items); /* vector pointer */ | |
366 | len = SCM_VECTOR_LENGTH (items); | |
367 | j = len - 1; | |
368 | while (j > 0) | |
54e09076 | 369 | { |
7888309b | 370 | if (scm_is_true ((*cmp) (less, vp[1], vp[0]))) |
b5c2579a DH |
371 | return SCM_BOOL_F; |
372 | else | |
373 | { | |
374 | vp++; | |
375 | j--; | |
376 | } | |
54e09076 | 377 | } |
b5c2579a | 378 | return SCM_BOOL_T; |
54e09076 | 379 | } |
b5c2579a | 380 | |
54e09076 | 381 | return SCM_BOOL_F; |
1bbd0b84 GB |
382 | } |
383 | #undef FUNC_NAME | |
54e09076 | 384 | |
d339981a | 385 | |
54e09076 MD |
386 | /* (merge a b less?) |
387 | takes two lists a and b such that (sorted? a less?) and (sorted? b less?) | |
388 | and returns a new list in which the elements of a and b have been stably | |
389 | interleaved so that (sorted? (merge a b less?) less?). | |
390 | Note: this does _not_ accept vectors. */ | |
a1ec6916 | 391 | SCM_DEFINE (scm_merge, "merge", 3, 0, 0, |
1bbd0b84 | 392 | (SCM alist, SCM blist, SCM less), |
8f85c0c6 NJ |
393 | "Merge two already sorted lists into one.\n" |
394 | "Given two lists @var{alist} and @var{blist}, such that\n" | |
395 | "@code{(sorted? alist less?)} and @code{(sorted? blist less?)},\n" | |
396 | "return a new list in which the elements of @var{alist} and\n" | |
e3239868 DH |
397 | "@var{blist} have been stably interleaved so that\n" |
398 | "@code{(sorted? (merge alist blist less?) less?)}.\n" | |
399 | "Note: this does _not_ accept vectors.") | |
1bbd0b84 | 400 | #define FUNC_NAME s_scm_merge |
54e09076 | 401 | { |
d339981a | 402 | SCM build; |
54e09076 | 403 | |
c96d76b8 | 404 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 405 | return blist; |
c96d76b8 | 406 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
407 | return alist; |
408 | else | |
409 | { | |
d339981a DH |
410 | const scm_t_trampoline_2 cmp = compare_function (less, 3, FUNC_NAME); |
411 | long alen, blen; /* list lengths */ | |
412 | SCM last; | |
413 | ||
34d19ef6 HWN |
414 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1, alist, alen); |
415 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2, blist, blen); | |
7888309b | 416 | if (scm_is_true ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
417 | { |
418 | build = scm_cons (SCM_CAR (blist), SCM_EOL); | |
419 | blist = SCM_CDR (blist); | |
420 | blen--; | |
421 | } | |
c56cc3c8 MD |
422 | else |
423 | { | |
424 | build = scm_cons (SCM_CAR (alist), SCM_EOL); | |
425 | alist = SCM_CDR (alist); | |
426 | alen--; | |
427 | } | |
54e09076 MD |
428 | last = build; |
429 | while ((alen > 0) && (blen > 0)) | |
430 | { | |
ee1ac75b | 431 | SCM_TICK; |
7888309b | 432 | if (scm_is_true ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
433 | { |
434 | SCM_SETCDR (last, scm_cons (SCM_CAR (blist), SCM_EOL)); | |
435 | blist = SCM_CDR (blist); | |
436 | blen--; | |
437 | } | |
c56cc3c8 MD |
438 | else |
439 | { | |
440 | SCM_SETCDR (last, scm_cons (SCM_CAR (alist), SCM_EOL)); | |
441 | alist = SCM_CDR (alist); | |
442 | alen--; | |
443 | } | |
54e09076 MD |
444 | last = SCM_CDR (last); |
445 | } | |
446 | if ((alen > 0) && (blen == 0)) | |
447 | SCM_SETCDR (last, alist); | |
448 | else if ((alen == 0) && (blen > 0)) | |
449 | SCM_SETCDR (last, blist); | |
450 | } | |
451 | return build; | |
1bbd0b84 GB |
452 | } |
453 | #undef FUNC_NAME | |
454 | ||
54e09076 MD |
455 | |
456 | static SCM | |
457 | scm_merge_list_x (SCM alist, SCM blist, | |
458 | long alen, long blen, | |
d339981a | 459 | scm_t_trampoline_2 cmp, SCM less) |
54e09076 MD |
460 | { |
461 | SCM build, last; | |
462 | ||
c96d76b8 | 463 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 464 | return blist; |
c96d76b8 | 465 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
466 | return alist; |
467 | else | |
468 | { | |
7888309b | 469 | if (scm_is_true ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
470 | { |
471 | build = blist; | |
472 | blist = SCM_CDR (blist); | |
473 | blen--; | |
474 | } | |
c56cc3c8 MD |
475 | else |
476 | { | |
477 | build = alist; | |
478 | alist = SCM_CDR (alist); | |
479 | alen--; | |
480 | } | |
54e09076 MD |
481 | last = build; |
482 | while ((alen > 0) && (blen > 0)) | |
483 | { | |
ee1ac75b | 484 | SCM_TICK; |
7888309b | 485 | if (scm_is_true ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
486 | { |
487 | SCM_SETCDR (last, blist); | |
488 | blist = SCM_CDR (blist); | |
489 | blen--; | |
490 | } | |
c56cc3c8 MD |
491 | else |
492 | { | |
493 | SCM_SETCDR (last, alist); | |
494 | alist = SCM_CDR (alist); | |
495 | alen--; | |
496 | } | |
54e09076 MD |
497 | last = SCM_CDR (last); |
498 | } | |
499 | if ((alen > 0) && (blen == 0)) | |
500 | SCM_SETCDR (last, alist); | |
501 | else if ((alen == 0) && (blen > 0)) | |
502 | SCM_SETCDR (last, blist); | |
503 | } | |
504 | return build; | |
505 | } /* scm_merge_list_x */ | |
506 | ||
d339981a | 507 | |
a1ec6916 | 508 | SCM_DEFINE (scm_merge_x, "merge!", 3, 0, 0, |
1bbd0b84 | 509 | (SCM alist, SCM blist, SCM less), |
e3239868 DH |
510 | "Takes two lists @var{alist} and @var{blist} such that\n" |
511 | "@code{(sorted? alist less?)} and @code{(sorted? blist less?)} and\n" | |
512 | "returns a new list in which the elements of @var{alist} and\n" | |
513 | "@var{blist} have been stably interleaved so that\n" | |
514 | " @code{(sorted? (merge alist blist less?) less?)}.\n" | |
515 | "This is the destructive variant of @code{merge}\n" | |
516 | "Note: this does _not_ accept vectors.") | |
1bbd0b84 | 517 | #define FUNC_NAME s_scm_merge_x |
54e09076 | 518 | { |
c96d76b8 | 519 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 520 | return blist; |
c96d76b8 | 521 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
522 | return alist; |
523 | else | |
524 | { | |
d339981a DH |
525 | const scm_t_trampoline_2 cmp = compare_function (less, 3, FUNC_NAME); |
526 | long alen, blen; /* list lengths */ | |
34d19ef6 HWN |
527 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1, alist, alen); |
528 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2, blist, blen); | |
d339981a | 529 | return scm_merge_list_x (alist, blist, alen, blen, cmp, less); |
54e09076 | 530 | } |
1bbd0b84 GB |
531 | } |
532 | #undef FUNC_NAME | |
54e09076 | 533 | |
d339981a | 534 | |
54e09076 MD |
535 | /* This merge sort algorithm is same as slib's by Richard A. O'Keefe. |
536 | The algorithm is stable. We also tried to use the algorithm used by | |
537 | scsh's merge-sort but that algorithm showed to not be stable, even | |
538 | though it claimed to be. | |
539 | */ | |
540 | static SCM | |
d339981a | 541 | scm_merge_list_step (SCM * seq, scm_t_trampoline_2 cmp, SCM less, long n) |
54e09076 | 542 | { |
c56cc3c8 MD |
543 | SCM a, b; |
544 | ||
54e09076 MD |
545 | if (n > 2) |
546 | { | |
c014a02e | 547 | long mid = n / 2; |
ee1ac75b | 548 | SCM_TICK; |
c56cc3c8 MD |
549 | a = scm_merge_list_step (seq, cmp, less, mid); |
550 | b = scm_merge_list_step (seq, cmp, less, n - mid); | |
551 | return scm_merge_list_x (a, b, mid, n - mid, cmp, less); | |
54e09076 MD |
552 | } |
553 | else if (n == 2) | |
554 | { | |
555 | SCM p = *seq; | |
556 | SCM rest = SCM_CDR (*seq); | |
557 | SCM x = SCM_CAR (*seq); | |
558 | SCM y = SCM_CAR (SCM_CDR (*seq)); | |
559 | *seq = SCM_CDR (rest); | |
560 | SCM_SETCDR (rest, SCM_EOL); | |
7888309b | 561 | if (scm_is_true ((*cmp) (less, y, x))) |
54e09076 | 562 | { |
4b479d98 DH |
563 | SCM_SETCAR (p, y); |
564 | SCM_SETCAR (rest, x); | |
54e09076 MD |
565 | } |
566 | return p; | |
567 | } | |
568 | else if (n == 1) | |
569 | { | |
570 | SCM p = *seq; | |
571 | *seq = SCM_CDR (p); | |
572 | SCM_SETCDR (p, SCM_EOL); | |
573 | return p; | |
574 | } | |
575 | else | |
576 | return SCM_EOL; | |
577 | } /* scm_merge_list_step */ | |
578 | ||
579 | ||
a1ec6916 | 580 | SCM_DEFINE (scm_sort_x, "sort!", 2, 0, 0, |
1bbd0b84 | 581 | (SCM items, SCM less), |
e3239868 DH |
582 | "Sort the sequence @var{items}, which may be a list or a\n" |
583 | "vector. @var{less} is used for comparing the sequence\n" | |
584 | "elements. The sorting is destructive, that means that the\n" | |
585 | "input sequence is modified to produce the sorted result.\n" | |
586 | "This is not a stable sort.") | |
1bbd0b84 | 587 | #define FUNC_NAME s_scm_sort_x |
54e09076 | 588 | { |
c014a02e | 589 | long len; /* list/vector length */ |
c96d76b8 NJ |
590 | if (SCM_NULL_OR_NIL_P (items)) |
591 | return items; | |
b5c2579a | 592 | |
d2e53ed6 | 593 | if (scm_is_pair (items)) |
54e09076 | 594 | { |
d339981a | 595 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
34d19ef6 | 596 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 597 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 MD |
598 | } |
599 | else if (SCM_VECTORP (items)) | |
600 | { | |
b5c2579a | 601 | len = SCM_VECTOR_LENGTH (items); |
54e09076 MD |
602 | scm_restricted_vector_sort_x (items, |
603 | less, | |
e11e83f3 MV |
604 | scm_from_int (0), |
605 | scm_from_long (len)); | |
54e09076 MD |
606 | return items; |
607 | } | |
608 | else | |
276dd677 | 609 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 610 | } |
0f981281 | 611 | #undef FUNC_NAME |
54e09076 | 612 | |
1bbd0b84 | 613 | |
a1ec6916 | 614 | SCM_DEFINE (scm_sort, "sort", 2, 0, 0, |
1bbd0b84 | 615 | (SCM items, SCM less), |
e3239868 DH |
616 | "Sort the sequence @var{items}, which may be a list or a\n" |
617 | "vector. @var{less} is used for comparing the sequence\n" | |
618 | "elements. This is not a stable sort.") | |
1bbd0b84 | 619 | #define FUNC_NAME s_scm_sort |
54e09076 | 620 | { |
c96d76b8 NJ |
621 | if (SCM_NULL_OR_NIL_P (items)) |
622 | return items; | |
b5c2579a | 623 | |
d2e53ed6 | 624 | if (scm_is_pair (items)) |
54e09076 | 625 | { |
d339981a | 626 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 627 | long len; |
e9e225e5 | 628 | |
34d19ef6 | 629 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 630 | items = scm_list_copy (items); |
d339981a | 631 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 | 632 | } |
3cfe6eab | 633 | #if SCM_HAVE_ARRAYS |
afe5177e | 634 | /* support ordinary vectors even if arrays not available? */ |
54e09076 MD |
635 | else if (SCM_VECTORP (items)) |
636 | { | |
c014a02e | 637 | long len = SCM_VECTOR_LENGTH (items); |
e9e225e5 GH |
638 | SCM sortvec = scm_make_uve (len, scm_array_prototype (items)); |
639 | ||
54e09076 MD |
640 | scm_array_copy_x (items, sortvec); |
641 | scm_restricted_vector_sort_x (sortvec, | |
642 | less, | |
e11e83f3 MV |
643 | scm_from_int (0), |
644 | scm_from_long (len)); | |
54e09076 MD |
645 | return sortvec; |
646 | } | |
afe5177e | 647 | #endif |
54e09076 | 648 | else |
276dd677 | 649 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 650 | } |
0f981281 | 651 | #undef FUNC_NAME |
54e09076 | 652 | |
d339981a | 653 | |
54e09076 | 654 | static void |
1d1559ce HWN |
655 | scm_merge_vector_x (SCM vec, |
656 | SCM * temp, | |
d339981a | 657 | scm_t_trampoline_2 cmp, |
54e09076 | 658 | SCM less, |
c014a02e ML |
659 | long low, |
660 | long mid, | |
661 | long high) | |
54e09076 | 662 | { |
c014a02e ML |
663 | long it; /* Index for temp vector */ |
664 | long i1 = low; /* Index for lower vector segment */ | |
665 | long i2 = mid + 1; /* Index for upper vector segment */ | |
54e09076 MD |
666 | |
667 | /* Copy while both segments contain more characters */ | |
668 | for (it = low; (i1 <= mid) && (i2 <= high); ++it) | |
1d1559ce HWN |
669 | { |
670 | /* | |
671 | Every call of LESS might invoke GC. For full correctness, we | |
672 | should reset the generation of vecbase and tempbase between | |
673 | every call of less. | |
54e09076 | 674 | |
1d1559ce HWN |
675 | */ |
676 | register SCM *vp = SCM_WRITABLE_VELTS(vec); | |
677 | ||
7888309b | 678 | if (scm_is_true ((*cmp) (less, vp[i2], vp[i1]))) |
1d1559ce HWN |
679 | temp[it] = vp[i2++]; |
680 | else | |
681 | temp[it] = vp[i1++]; | |
682 | } | |
54e09076 | 683 | |
1d1559ce HWN |
684 | { |
685 | register SCM *vp = SCM_WRITABLE_VELTS(vec); | |
686 | ||
687 | /* Copy while first segment contains more characters */ | |
688 | while (i1 <= mid) | |
689 | temp[it++] = vp[i1++]; | |
690 | ||
691 | /* Copy while second segment contains more characters */ | |
692 | while (i2 <= high) | |
693 | temp[it++] = vp[i2++]; | |
694 | ||
695 | /* Copy back from temp to vp */ | |
696 | for (it = low; it <= high; ++it) | |
697 | vp[it] = temp[it]; | |
698 | } | |
699 | } /* scm_merge_vector_x */ | |
54e09076 | 700 | |
d339981a | 701 | |
54e09076 | 702 | static void |
1d1559ce HWN |
703 | scm_merge_vector_step (SCM vp, |
704 | SCM * temp, | |
d339981a | 705 | scm_t_trampoline_2 cmp, |
54e09076 | 706 | SCM less, |
c014a02e ML |
707 | long low, |
708 | long high) | |
54e09076 MD |
709 | { |
710 | if (high > low) | |
711 | { | |
c014a02e | 712 | long mid = (low + high) / 2; |
ee1ac75b | 713 | SCM_TICK; |
54e09076 MD |
714 | scm_merge_vector_step (vp, temp, cmp, less, low, mid); |
715 | scm_merge_vector_step (vp, temp, cmp, less, mid+1, high); | |
716 | scm_merge_vector_x (vp, temp, cmp, less, low, mid, high); | |
717 | } | |
718 | } /* scm_merge_vector_step */ | |
719 | ||
720 | ||
a1ec6916 | 721 | SCM_DEFINE (scm_stable_sort_x, "stable-sort!", 2, 0, 0, |
1bbd0b84 | 722 | (SCM items, SCM less), |
e3239868 DH |
723 | "Sort the sequence @var{items}, which may be a list or a\n" |
724 | "vector. @var{less} is used for comparing the sequence elements.\n" | |
725 | "The sorting is destructive, that means that the input sequence\n" | |
726 | "is modified to produce the sorted result.\n" | |
727 | "This is a stable sort.") | |
1bbd0b84 | 728 | #define FUNC_NAME s_scm_stable_sort_x |
54e09076 | 729 | { |
d339981a | 730 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 731 | long len; /* list/vector length */ |
54e09076 | 732 | |
c96d76b8 NJ |
733 | if (SCM_NULL_OR_NIL_P (items)) |
734 | return items; | |
b5c2579a | 735 | |
d2e53ed6 | 736 | if (scm_is_pair (items)) |
54e09076 | 737 | { |
34d19ef6 | 738 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 739 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 MD |
740 | } |
741 | else if (SCM_VECTORP (items)) | |
742 | { | |
1d1559ce | 743 | SCM *temp; |
b5c2579a | 744 | len = SCM_VECTOR_LENGTH (items); |
34d19ef6 | 745 | |
34d19ef6 | 746 | /* |
1d1559ce HWN |
747 | the following array does not contain any new references to |
748 | SCM objects, so we can get away with allocing it on the heap. | |
749 | */ | |
67329a9e | 750 | temp = scm_malloc (len * sizeof(SCM)); |
34d19ef6 | 751 | |
d339981a | 752 | scm_merge_vector_step (items, temp, cmp, less, 0, len - 1); |
54e09076 MD |
753 | free(temp); |
754 | return items; | |
755 | } | |
756 | else | |
276dd677 | 757 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 758 | } |
0f981281 | 759 | #undef FUNC_NAME |
54e09076 | 760 | |
d339981a | 761 | |
a1ec6916 | 762 | SCM_DEFINE (scm_stable_sort, "stable-sort", 2, 0, 0, |
1bbd0b84 | 763 | (SCM items, SCM less), |
e3239868 DH |
764 | "Sort the sequence @var{items}, which may be a list or a\n" |
765 | "vector. @var{less} is used for comparing the sequence elements.\n" | |
766 | "This is a stable sort.") | |
1bbd0b84 | 767 | #define FUNC_NAME s_scm_stable_sort |
54e09076 | 768 | { |
d339981a | 769 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
1d1559ce | 770 | |
c96d76b8 NJ |
771 | if (SCM_NULL_OR_NIL_P (items)) |
772 | return items; | |
b5c2579a | 773 | |
d2e53ed6 | 774 | if (scm_is_pair (items)) |
54e09076 | 775 | { |
1d1559ce | 776 | long len; /* list/vector length */ |
d339981a | 777 | |
34d19ef6 | 778 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 779 | items = scm_list_copy (items); |
d339981a | 780 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 | 781 | } |
3cfe6eab | 782 | #if SCM_HAVE_ARRAYS |
afe5177e | 783 | /* support ordinary vectors even if arrays not available? */ |
54e09076 MD |
784 | else if (SCM_VECTORP (items)) |
785 | { | |
1d1559ce | 786 | long len = SCM_VECTOR_LENGTH (items); |
67329a9e | 787 | SCM *temp = scm_malloc (len * sizeof (SCM)); |
1d1559ce | 788 | SCM retvec = scm_make_uve (len, scm_array_prototype (items)); |
54e09076 | 789 | scm_array_copy_x (items, retvec); |
34d19ef6 | 790 | |
d339981a | 791 | scm_merge_vector_step (retvec, temp, cmp, less, 0, len - 1); |
54e09076 MD |
792 | free (temp); |
793 | return retvec; | |
794 | } | |
afe5177e | 795 | #endif |
54e09076 | 796 | else |
276dd677 | 797 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 798 | } |
0f981281 | 799 | #undef FUNC_NAME |
54e09076 | 800 | |
d339981a | 801 | |
a1ec6916 | 802 | SCM_DEFINE (scm_sort_list_x, "sort-list!", 2, 0, 0, |
1bbd0b84 | 803 | (SCM items, SCM less), |
e3239868 DH |
804 | "Sort the list @var{items}, using @var{less} for comparing the\n" |
805 | "list elements. The sorting is destructive, that means that the\n" | |
806 | "input list is modified to produce the sorted result.\n" | |
807 | "This is a stable sort.") | |
1bbd0b84 | 808 | #define FUNC_NAME s_scm_sort_list_x |
54e09076 | 809 | { |
d339981a | 810 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 811 | long len; |
d339981a | 812 | |
34d19ef6 | 813 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 814 | return scm_merge_list_step (&items, cmp, less, len); |
1bbd0b84 | 815 | } |
0f981281 | 816 | #undef FUNC_NAME |
54e09076 | 817 | |
d339981a | 818 | |
a1ec6916 | 819 | SCM_DEFINE (scm_sort_list, "sort-list", 2, 0, 0, |
e3239868 DH |
820 | (SCM items, SCM less), |
821 | "Sort the list @var{items}, using @var{less} for comparing the\n" | |
822 | "list elements. This is a stable sort.") | |
1bbd0b84 | 823 | #define FUNC_NAME s_scm_sort_list |
54e09076 | 824 | { |
d339981a | 825 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 826 | long len; |
d339981a | 827 | |
34d19ef6 | 828 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 829 | items = scm_list_copy (items); |
d339981a | 830 | return scm_merge_list_step (&items, cmp, less, len); |
1bbd0b84 | 831 | } |
0f981281 | 832 | #undef FUNC_NAME |
54e09076 | 833 | |
d339981a | 834 | |
54e09076 MD |
835 | void |
836 | scm_init_sort () | |
837 | { | |
a0599745 | 838 | #include "libguile/sort.x" |
54e09076 MD |
839 | |
840 | scm_add_feature ("sort"); | |
841 | } | |
89e00824 ML |
842 | |
843 | /* | |
844 | Local Variables: | |
845 | c-file-style: "gnu" | |
846 | End: | |
847 | */ |