Commit | Line | Data |
---|---|---|
504d99c5 | 1 | /* Copyright (C) 1999,2000,2001,2002 Free Software Foundation, Inc. |
73be1d9e MV |
2 | * This library is free software; you can redistribute it and/or |
3 | * modify it under the terms of the GNU Lesser General Public | |
4 | * License as published by the Free Software Foundation; either | |
5 | * version 2.1 of the License, or (at your option) any later version. | |
54e09076 | 6 | * |
73be1d9e MV |
7 | * This library is distributed in the hope that it will be useful, |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10 | * Lesser General Public License for more details. | |
54e09076 | 11 | * |
73be1d9e MV |
12 | * You should have received a copy of the GNU Lesser General Public |
13 | * License along with this library; if not, write to the Free Software | |
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
15 | */ | |
54e09076 | 16 | |
1bbd0b84 GB |
17 | |
18 | ||
54e09076 MD |
19 | /* Written in December 1998 by Roland Orre <orre@nada.kth.se> |
20 | * This implements the same sort interface as slib/sort.scm | |
21 | * for lists and vectors where slib defines: | |
22 | * sorted?, merge, merge!, sort, sort! | |
15d9c4e3 | 23 | * For scsh compatibility sort-list and sort-list! are also defined. |
54e09076 | 24 | * In cases where a stable-sort is required use stable-sort or |
15d9c4e3 | 25 | * stable-sort!. An additional feature is |
54e09076 | 26 | * (restricted-vector-sort! vector less? startpos endpos) |
15d9c4e3 | 27 | * which allows you to sort part of a vector. |
54e09076 MD |
28 | * Thanks to Aubrey Jaffer for the slib/sort.scm library. |
29 | * Thanks to Richard A. O'Keefe (based on Prolog code by D.H.D.Warren) | |
30 | * for the merge sort inspiration. | |
31 | * Thanks to Douglas C. Schmidt (schmidt@ics.uci.edu) for the | |
32 | * quicksort code. | |
33 | */ | |
34 | ||
a0599745 | 35 | #include "libguile/_scm.h" |
a0599745 MD |
36 | #include "libguile/eval.h" |
37 | #include "libguile/unif.h" | |
38 | #include "libguile/ramap.h" | |
a0599745 MD |
39 | #include "libguile/feature.h" |
40 | #include "libguile/vectors.h" | |
c96d76b8 | 41 | #include "libguile/lang.h" |
54e09076 | 42 | |
a0599745 MD |
43 | #include "libguile/validate.h" |
44 | #include "libguile/sort.h" | |
54e09076 | 45 | |
d339981a | 46 | |
54e09076 MD |
47 | /* The routine quicksort was extracted from the GNU C Library qsort.c |
48 | written by Douglas C. Schmidt (schmidt@ics.uci.edu) | |
49 | and adapted to guile by adding an extra pointer less | |
50 | to quicksort by Roland Orre <orre@nada.kth.se>. | |
51 | ||
52 | The reason to do this instead of using the library function qsort | |
53 | was to avoid dependency of the ANSI-C extensions for local functions | |
54 | and also to avoid obscure pool based solutions. | |
15d9c4e3 MD |
55 | |
56 | This sorting routine is not much more efficient than the stable | |
57 | version but doesn't consume extra memory. | |
54e09076 MD |
58 | */ |
59 | ||
d339981a | 60 | #define SWAP(a, b) do { const SCM _tmp = a; a = b; b = _tmp; } while (0) |
54e09076 MD |
61 | |
62 | ||
63 | /* Order size using quicksort. This implementation incorporates | |
64 | four optimizations discussed in Sedgewick: | |
65 | ||
d339981a DH |
66 | 1. Non-recursive, using an explicit stack of pointer that store the next |
67 | array partition to sort. To save time, this maximum amount of space | |
68 | required to store an array of MAX_SIZE_T is allocated on the stack. | |
69 | Assuming a bit width of 32 bits for size_t, this needs only | |
70 | 32 * sizeof (stack_node) == 128 bytes. Pretty cheap, actually. | |
54e09076 | 71 | |
d339981a DH |
72 | 2. Chose the pivot element using a median-of-three decision tree. This |
73 | reduces the probability of selecting a bad pivot value and eliminates | |
74 | certain extraneous comparisons. | |
54e09076 | 75 | |
d339981a DH |
76 | 3. Only quicksorts NR_ELEMS / MAX_THRESH partitions, leaving insertion sort |
77 | to order the MAX_THRESH items within each partition. This is a big win, | |
78 | since insertion sort is faster for small, mostly sorted array segments. | |
54e09076 MD |
79 | |
80 | 4. The larger of the two sub-partitions is always pushed onto the | |
81 | stack first, with the algorithm then concentrating on the | |
82 | smaller partition. This *guarantees* no more than log (n) | |
83 | stack size is needed (actually O(1) in this case)! */ | |
84 | ||
54e09076 | 85 | |
d339981a DH |
86 | /* Discontinue quicksort algorithm when partition gets below this size. |
87 | * This particular magic number was chosen to work best on a Sun 4/260. */ | |
88 | #define MAX_THRESH 4 | |
89 | ||
90 | ||
91 | /* Inline stack abstraction: The stack size for quicksorting at most as many | |
92 | * elements as can be given by a value of type size_t is, as described above, | |
93 | * log (MAX_SIZE_T), which is the number of bits of size_t. More accurately, | |
94 | * we would only need ceil (log (MAX_SIZE_T / MAX_THRESH)), but this is | |
95 | * ignored below. */ | |
96 | ||
97 | /* Stack node declarations used to store unfulfilled partition obligations. */ | |
98 | typedef struct { | |
99 | size_t lo; | |
100 | size_t hi; | |
101 | } stack_node; | |
102 | ||
103 | #define STACK_SIZE (8 * sizeof (size_t)) /* assume 8 bit char */ | |
104 | #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) | |
105 | #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) | |
106 | #define STACK_NOT_EMPTY (stack < top) | |
107 | ||
a34af05e | 108 | |
54e09076 | 109 | static void |
d339981a | 110 | quicksort (SCM *const base_ptr, size_t nr_elems, scm_t_trampoline_2 cmp, SCM less) |
54e09076 | 111 | { |
d339981a | 112 | static const char s_buggy_less[] = "buggy less predicate used when sorting"; |
54e09076 | 113 | |
d339981a | 114 | if (nr_elems == 0) |
54e09076 MD |
115 | /* Avoid lossage with unsigned arithmetic below. */ |
116 | return; | |
117 | ||
d339981a | 118 | if (nr_elems > MAX_THRESH) |
54e09076 | 119 | { |
d339981a DH |
120 | size_t lo = 0; |
121 | size_t hi = nr_elems - 1; | |
122 | ||
54e09076 MD |
123 | stack_node stack[STACK_SIZE]; |
124 | stack_node *top = stack + 1; | |
125 | ||
126 | while (STACK_NOT_EMPTY) | |
127 | { | |
d339981a DH |
128 | size_t left; |
129 | size_t right; | |
14e9281b | 130 | SCM pivot; |
54e09076 MD |
131 | |
132 | /* Select median value from among LO, MID, and HI. Rearrange | |
133 | LO and HI so the three values are sorted. This lowers the | |
134 | probability of picking a pathological pivot value and | |
d339981a | 135 | skips a comparison for both the left and right. */ |
54e09076 | 136 | |
d339981a | 137 | size_t mid = lo + (hi - lo) / 2; |
54e09076 | 138 | |
7888309b | 139 | if (scm_is_true ((*cmp) (less, base_ptr[mid], base_ptr[lo]))) |
d339981a | 140 | SWAP (base_ptr[mid], base_ptr[lo]); |
7888309b | 141 | if (scm_is_true ((*cmp) (less, base_ptr[hi], base_ptr[mid]))) |
d339981a | 142 | SWAP (base_ptr[mid], base_ptr[hi]); |
54e09076 MD |
143 | else |
144 | goto jump_over; | |
7888309b | 145 | if (scm_is_true ((*cmp) (less, base_ptr[mid], base_ptr[lo]))) |
d339981a | 146 | SWAP (base_ptr[mid], base_ptr[lo]); |
54e09076 | 147 | jump_over:; |
54e09076 | 148 | |
14e9281b | 149 | pivot = base_ptr[mid]; |
d339981a DH |
150 | left = lo + 1; |
151 | right = hi - 1; | |
54e09076 MD |
152 | |
153 | /* Here's the famous ``collapse the walls'' section of quicksort. | |
154 | Gotta like those tight inner loops! They are the main reason | |
155 | that this algorithm runs much faster than others. */ | |
156 | do | |
157 | { | |
14e9281b | 158 | while (scm_is_true ((*cmp) (less, base_ptr[left], pivot))) |
a34af05e | 159 | { |
d339981a | 160 | left++; |
a34af05e | 161 | /* The comparison predicate may be buggy */ |
d339981a | 162 | if (left > hi) |
5d2d2ffc | 163 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 164 | } |
54e09076 | 165 | |
14e9281b | 166 | while (scm_is_true ((*cmp) (less, pivot, base_ptr[right]))) |
a34af05e | 167 | { |
d339981a | 168 | right--; |
a34af05e | 169 | /* The comparison predicate may be buggy */ |
d339981a | 170 | if (right < lo) |
5d2d2ffc | 171 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 172 | } |
54e09076 | 173 | |
d339981a | 174 | if (left < right) |
54e09076 | 175 | { |
d339981a DH |
176 | SWAP (base_ptr[left], base_ptr[right]); |
177 | left++; | |
178 | right--; | |
54e09076 | 179 | } |
d339981a | 180 | else if (left == right) |
54e09076 | 181 | { |
d339981a DH |
182 | left++; |
183 | right--; | |
54e09076 MD |
184 | break; |
185 | } | |
186 | } | |
d339981a | 187 | while (left <= right); |
54e09076 MD |
188 | |
189 | /* Set up pointers for next iteration. First determine whether | |
190 | left and right partitions are below the threshold size. If so, | |
191 | ignore one or both. Otherwise, push the larger partition's | |
192 | bounds on the stack and continue sorting the smaller one. */ | |
193 | ||
d339981a | 194 | if ((size_t) (right - lo) <= MAX_THRESH) |
54e09076 | 195 | { |
d339981a | 196 | if ((size_t) (hi - left) <= MAX_THRESH) |
54e09076 MD |
197 | /* Ignore both small partitions. */ |
198 | POP (lo, hi); | |
199 | else | |
200 | /* Ignore small left partition. */ | |
d339981a | 201 | lo = left; |
54e09076 | 202 | } |
d339981a | 203 | else if ((size_t) (hi - left) <= MAX_THRESH) |
54e09076 | 204 | /* Ignore small right partition. */ |
d339981a DH |
205 | hi = right; |
206 | else if ((right - lo) > (hi - left)) | |
54e09076 MD |
207 | { |
208 | /* Push larger left partition indices. */ | |
d339981a DH |
209 | PUSH (lo, right); |
210 | lo = left; | |
54e09076 MD |
211 | } |
212 | else | |
213 | { | |
214 | /* Push larger right partition indices. */ | |
d339981a DH |
215 | PUSH (left, hi); |
216 | hi = right; | |
54e09076 MD |
217 | } |
218 | } | |
219 | } | |
220 | ||
d339981a DH |
221 | /* Once the BASE_PTR array is partially sorted by quicksort the rest is |
222 | completely sorted using insertion sort, since this is efficient for | |
223 | partitions below MAX_THRESH size. BASE_PTR points to the beginning of the | |
224 | array to sort, and END idexes the very last element in the array (*not* | |
225 | one beyond it!). */ | |
54e09076 MD |
226 | |
227 | { | |
d339981a DH |
228 | size_t tmp = 0; |
229 | size_t end = nr_elems - 1; | |
230 | size_t thresh = min (end, MAX_THRESH); | |
231 | size_t run; | |
54e09076 MD |
232 | |
233 | /* Find smallest element in first threshold and place it at the | |
234 | array's beginning. This is the smallest array element, | |
235 | and the operation speeds up insertion sort's inner loop. */ | |
236 | ||
d339981a | 237 | for (run = tmp + 1; run <= thresh; run++) |
7888309b | 238 | if (scm_is_true ((*cmp) (less, base_ptr[run], base_ptr[tmp]))) |
d339981a | 239 | tmp = run; |
54e09076 | 240 | |
d339981a DH |
241 | if (tmp != 0) |
242 | SWAP (base_ptr[tmp], base_ptr[0]); | |
54e09076 MD |
243 | |
244 | /* Insertion sort, running from left-hand-side up to right-hand-side. */ | |
245 | ||
d339981a DH |
246 | run = 1; |
247 | while (++run <= end) | |
54e09076 | 248 | { |
d339981a | 249 | tmp = run - 1; |
7888309b | 250 | while (scm_is_true ((*cmp) (less, base_ptr[run], base_ptr[tmp]))) |
a34af05e | 251 | { |
a34af05e | 252 | /* The comparison predicate may be buggy */ |
d339981a | 253 | if (tmp == 0) |
5d2d2ffc | 254 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
d339981a DH |
255 | |
256 | tmp--; | |
a34af05e | 257 | } |
54e09076 | 258 | |
d339981a DH |
259 | tmp++; |
260 | if (tmp != run) | |
54e09076 | 261 | { |
d339981a DH |
262 | SCM to_insert = base_ptr[run]; |
263 | size_t hi, lo; | |
264 | ||
265 | for (hi = lo = run; --lo >= tmp; hi = lo) | |
266 | base_ptr[hi] = base_ptr[lo]; | |
267 | base_ptr[hi] = to_insert; | |
54e09076 MD |
268 | } |
269 | } | |
270 | } | |
d339981a | 271 | } |
54e09076 | 272 | |
54e09076 | 273 | |
d339981a DH |
274 | static scm_t_trampoline_2 |
275 | compare_function (SCM less, unsigned int arg_nr, const char* fname) | |
54e09076 | 276 | { |
d339981a DH |
277 | const scm_t_trampoline_2 cmp = scm_trampoline_2 (less); |
278 | SCM_ASSERT_TYPE (cmp != NULL, less, arg_nr, fname, "less predicate"); | |
279 | return cmp; | |
280 | } | |
54e09076 | 281 | |
54e09076 MD |
282 | |
283 | /* Question: Is there any need to make this a more general array sort? | |
284 | It is probably enough to manage the vector type. */ | |
285 | /* endpos equal as for substring, i.e. endpos is not included. */ | |
da4a1dba | 286 | /* More natural with length? */ |
1bbd0b84 | 287 | |
a1ec6916 | 288 | SCM_DEFINE (scm_restricted_vector_sort_x, "restricted-vector-sort!", 4, 0, 0, |
1bbd0b84 | 289 | (SCM vec, SCM less, SCM startpos, SCM endpos), |
e3239868 | 290 | "Sort the vector @var{vec}, using @var{less} for comparing\n" |
3bdf7962 MV |
291 | "the vector elements. @var{startpos} (inclusively) and\n" |
292 | "@var{endpos} (exclusively) delimit\n" | |
e3239868 DH |
293 | "the range of the vector which gets sorted. The return value\n" |
294 | "is not specified.") | |
1bbd0b84 | 295 | #define FUNC_NAME s_scm_restricted_vector_sort_x |
54e09076 | 296 | { |
d339981a DH |
297 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
298 | size_t vlen, spos, len; | |
54e09076 MD |
299 | SCM *vp; |
300 | ||
34d19ef6 | 301 | SCM_VALIDATE_VECTOR (1, vec); |
34d19ef6 | 302 | vp = SCM_WRITABLE_VELTS (vec); /* vector pointer */ |
b5c2579a | 303 | vlen = SCM_VECTOR_LENGTH (vec); |
54e09076 | 304 | |
a55c2b68 | 305 | spos = scm_to_unsigned_integer (startpos, 0, vlen); |
3bdf7962 | 306 | len = scm_to_unsigned_integer (endpos, spos, vlen) - spos; |
54e09076 | 307 | |
d339981a DH |
308 | quicksort (&vp[spos], len, cmp, less); |
309 | scm_remember_upto_here_1 (vec); | |
34d19ef6 | 310 | |
d339981a | 311 | return SCM_UNSPECIFIED; |
1bbd0b84 GB |
312 | } |
313 | #undef FUNC_NAME | |
54e09076 | 314 | |
d339981a | 315 | |
54e09076 MD |
316 | /* (sorted? sequence less?) |
317 | * is true when sequence is a list (x0 x1 ... xm) or a vector #(x0 ... xm) | |
318 | * such that for all 1 <= i <= m, | |
319 | * (not (less? (list-ref list i) (list-ref list (- i 1)))). */ | |
a1ec6916 | 320 | SCM_DEFINE (scm_sorted_p, "sorted?", 2, 0, 0, |
1bbd0b84 | 321 | (SCM items, SCM less), |
e3239868 DH |
322 | "Return @code{#t} iff @var{items} is a list or a vector such that\n" |
323 | "for all 1 <= i <= m, the predicate @var{less} returns true when\n" | |
324 | "applied to all elements i - 1 and i") | |
1bbd0b84 | 325 | #define FUNC_NAME s_scm_sorted_p |
54e09076 | 326 | { |
d339981a | 327 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 328 | long len, j; /* list/vector length, temp j */ |
54e09076 | 329 | SCM item, rest; /* rest of items loop variable */ |
34d19ef6 | 330 | SCM const *vp; |
54e09076 | 331 | |
c96d76b8 | 332 | if (SCM_NULL_OR_NIL_P (items)) |
54e09076 | 333 | return SCM_BOOL_T; |
1bbd0b84 | 334 | |
d2e53ed6 | 335 | if (scm_is_pair (items)) |
54e09076 MD |
336 | { |
337 | len = scm_ilength (items); /* also checks that it's a pure list */ | |
34d19ef6 | 338 | SCM_ASSERT_RANGE (1, items, len >= 0); |
54e09076 MD |
339 | if (len <= 1) |
340 | return SCM_BOOL_T; | |
341 | ||
342 | item = SCM_CAR (items); | |
343 | rest = SCM_CDR (items); | |
344 | j = len - 1; | |
345 | while (j > 0) | |
346 | { | |
7888309b | 347 | if (scm_is_true ((*cmp) (less, SCM_CAR (rest), item))) |
54e09076 MD |
348 | return SCM_BOOL_F; |
349 | else | |
350 | { | |
351 | item = SCM_CAR (rest); | |
352 | rest = SCM_CDR (rest); | |
353 | j--; | |
354 | } | |
355 | } | |
356 | return SCM_BOOL_T; | |
357 | } | |
358 | else | |
359 | { | |
b5c2579a DH |
360 | SCM_VALIDATE_VECTOR (1, items); |
361 | ||
362 | vp = SCM_VELTS (items); /* vector pointer */ | |
363 | len = SCM_VECTOR_LENGTH (items); | |
364 | j = len - 1; | |
365 | while (j > 0) | |
54e09076 | 366 | { |
7888309b | 367 | if (scm_is_true ((*cmp) (less, vp[1], vp[0]))) |
b5c2579a DH |
368 | return SCM_BOOL_F; |
369 | else | |
370 | { | |
371 | vp++; | |
372 | j--; | |
373 | } | |
54e09076 | 374 | } |
b5c2579a | 375 | return SCM_BOOL_T; |
54e09076 | 376 | } |
b5c2579a | 377 | |
54e09076 | 378 | return SCM_BOOL_F; |
1bbd0b84 GB |
379 | } |
380 | #undef FUNC_NAME | |
54e09076 | 381 | |
d339981a | 382 | |
54e09076 MD |
383 | /* (merge a b less?) |
384 | takes two lists a and b such that (sorted? a less?) and (sorted? b less?) | |
385 | and returns a new list in which the elements of a and b have been stably | |
386 | interleaved so that (sorted? (merge a b less?) less?). | |
387 | Note: this does _not_ accept vectors. */ | |
a1ec6916 | 388 | SCM_DEFINE (scm_merge, "merge", 3, 0, 0, |
1bbd0b84 | 389 | (SCM alist, SCM blist, SCM less), |
8f85c0c6 NJ |
390 | "Merge two already sorted lists into one.\n" |
391 | "Given two lists @var{alist} and @var{blist}, such that\n" | |
392 | "@code{(sorted? alist less?)} and @code{(sorted? blist less?)},\n" | |
393 | "return a new list in which the elements of @var{alist} and\n" | |
e3239868 DH |
394 | "@var{blist} have been stably interleaved so that\n" |
395 | "@code{(sorted? (merge alist blist less?) less?)}.\n" | |
396 | "Note: this does _not_ accept vectors.") | |
1bbd0b84 | 397 | #define FUNC_NAME s_scm_merge |
54e09076 | 398 | { |
d339981a | 399 | SCM build; |
54e09076 | 400 | |
c96d76b8 | 401 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 402 | return blist; |
c96d76b8 | 403 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
404 | return alist; |
405 | else | |
406 | { | |
d339981a DH |
407 | const scm_t_trampoline_2 cmp = compare_function (less, 3, FUNC_NAME); |
408 | long alen, blen; /* list lengths */ | |
409 | SCM last; | |
410 | ||
34d19ef6 HWN |
411 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1, alist, alen); |
412 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2, blist, blen); | |
7888309b | 413 | if (scm_is_true ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
414 | { |
415 | build = scm_cons (SCM_CAR (blist), SCM_EOL); | |
416 | blist = SCM_CDR (blist); | |
417 | blen--; | |
418 | } | |
c56cc3c8 MD |
419 | else |
420 | { | |
421 | build = scm_cons (SCM_CAR (alist), SCM_EOL); | |
422 | alist = SCM_CDR (alist); | |
423 | alen--; | |
424 | } | |
54e09076 MD |
425 | last = build; |
426 | while ((alen > 0) && (blen > 0)) | |
427 | { | |
7888309b | 428 | if (scm_is_true ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
429 | { |
430 | SCM_SETCDR (last, scm_cons (SCM_CAR (blist), SCM_EOL)); | |
431 | blist = SCM_CDR (blist); | |
432 | blen--; | |
433 | } | |
c56cc3c8 MD |
434 | else |
435 | { | |
436 | SCM_SETCDR (last, scm_cons (SCM_CAR (alist), SCM_EOL)); | |
437 | alist = SCM_CDR (alist); | |
438 | alen--; | |
439 | } | |
54e09076 MD |
440 | last = SCM_CDR (last); |
441 | } | |
442 | if ((alen > 0) && (blen == 0)) | |
443 | SCM_SETCDR (last, alist); | |
444 | else if ((alen == 0) && (blen > 0)) | |
445 | SCM_SETCDR (last, blist); | |
446 | } | |
447 | return build; | |
1bbd0b84 GB |
448 | } |
449 | #undef FUNC_NAME | |
450 | ||
54e09076 MD |
451 | |
452 | static SCM | |
453 | scm_merge_list_x (SCM alist, SCM blist, | |
454 | long alen, long blen, | |
d339981a | 455 | scm_t_trampoline_2 cmp, SCM less) |
54e09076 MD |
456 | { |
457 | SCM build, last; | |
458 | ||
c96d76b8 | 459 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 460 | return blist; |
c96d76b8 | 461 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
462 | return alist; |
463 | else | |
464 | { | |
7888309b | 465 | if (scm_is_true ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
466 | { |
467 | build = blist; | |
468 | blist = SCM_CDR (blist); | |
469 | blen--; | |
470 | } | |
c56cc3c8 MD |
471 | else |
472 | { | |
473 | build = alist; | |
474 | alist = SCM_CDR (alist); | |
475 | alen--; | |
476 | } | |
54e09076 MD |
477 | last = build; |
478 | while ((alen > 0) && (blen > 0)) | |
479 | { | |
7888309b | 480 | if (scm_is_true ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
481 | { |
482 | SCM_SETCDR (last, blist); | |
483 | blist = SCM_CDR (blist); | |
484 | blen--; | |
485 | } | |
c56cc3c8 MD |
486 | else |
487 | { | |
488 | SCM_SETCDR (last, alist); | |
489 | alist = SCM_CDR (alist); | |
490 | alen--; | |
491 | } | |
54e09076 MD |
492 | last = SCM_CDR (last); |
493 | } | |
494 | if ((alen > 0) && (blen == 0)) | |
495 | SCM_SETCDR (last, alist); | |
496 | else if ((alen == 0) && (blen > 0)) | |
497 | SCM_SETCDR (last, blist); | |
498 | } | |
499 | return build; | |
500 | } /* scm_merge_list_x */ | |
501 | ||
d339981a | 502 | |
a1ec6916 | 503 | SCM_DEFINE (scm_merge_x, "merge!", 3, 0, 0, |
1bbd0b84 | 504 | (SCM alist, SCM blist, SCM less), |
e3239868 DH |
505 | "Takes two lists @var{alist} and @var{blist} such that\n" |
506 | "@code{(sorted? alist less?)} and @code{(sorted? blist less?)} and\n" | |
507 | "returns a new list in which the elements of @var{alist} and\n" | |
508 | "@var{blist} have been stably interleaved so that\n" | |
509 | " @code{(sorted? (merge alist blist less?) less?)}.\n" | |
510 | "This is the destructive variant of @code{merge}\n" | |
511 | "Note: this does _not_ accept vectors.") | |
1bbd0b84 | 512 | #define FUNC_NAME s_scm_merge_x |
54e09076 | 513 | { |
c96d76b8 | 514 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 515 | return blist; |
c96d76b8 | 516 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
517 | return alist; |
518 | else | |
519 | { | |
d339981a DH |
520 | const scm_t_trampoline_2 cmp = compare_function (less, 3, FUNC_NAME); |
521 | long alen, blen; /* list lengths */ | |
34d19ef6 HWN |
522 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1, alist, alen); |
523 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2, blist, blen); | |
d339981a | 524 | return scm_merge_list_x (alist, blist, alen, blen, cmp, less); |
54e09076 | 525 | } |
1bbd0b84 GB |
526 | } |
527 | #undef FUNC_NAME | |
54e09076 | 528 | |
d339981a | 529 | |
54e09076 MD |
530 | /* This merge sort algorithm is same as slib's by Richard A. O'Keefe. |
531 | The algorithm is stable. We also tried to use the algorithm used by | |
532 | scsh's merge-sort but that algorithm showed to not be stable, even | |
533 | though it claimed to be. | |
534 | */ | |
535 | static SCM | |
d339981a | 536 | scm_merge_list_step (SCM * seq, scm_t_trampoline_2 cmp, SCM less, long n) |
54e09076 | 537 | { |
c56cc3c8 MD |
538 | SCM a, b; |
539 | ||
54e09076 MD |
540 | if (n > 2) |
541 | { | |
c014a02e | 542 | long mid = n / 2; |
c56cc3c8 MD |
543 | a = scm_merge_list_step (seq, cmp, less, mid); |
544 | b = scm_merge_list_step (seq, cmp, less, n - mid); | |
545 | return scm_merge_list_x (a, b, mid, n - mid, cmp, less); | |
54e09076 MD |
546 | } |
547 | else if (n == 2) | |
548 | { | |
549 | SCM p = *seq; | |
550 | SCM rest = SCM_CDR (*seq); | |
551 | SCM x = SCM_CAR (*seq); | |
552 | SCM y = SCM_CAR (SCM_CDR (*seq)); | |
553 | *seq = SCM_CDR (rest); | |
554 | SCM_SETCDR (rest, SCM_EOL); | |
7888309b | 555 | if (scm_is_true ((*cmp) (less, y, x))) |
54e09076 | 556 | { |
4b479d98 DH |
557 | SCM_SETCAR (p, y); |
558 | SCM_SETCAR (rest, x); | |
54e09076 MD |
559 | } |
560 | return p; | |
561 | } | |
562 | else if (n == 1) | |
563 | { | |
564 | SCM p = *seq; | |
565 | *seq = SCM_CDR (p); | |
566 | SCM_SETCDR (p, SCM_EOL); | |
567 | return p; | |
568 | } | |
569 | else | |
570 | return SCM_EOL; | |
571 | } /* scm_merge_list_step */ | |
572 | ||
573 | ||
a1ec6916 | 574 | SCM_DEFINE (scm_sort_x, "sort!", 2, 0, 0, |
1bbd0b84 | 575 | (SCM items, SCM less), |
e3239868 DH |
576 | "Sort the sequence @var{items}, which may be a list or a\n" |
577 | "vector. @var{less} is used for comparing the sequence\n" | |
578 | "elements. The sorting is destructive, that means that the\n" | |
579 | "input sequence is modified to produce the sorted result.\n" | |
580 | "This is not a stable sort.") | |
1bbd0b84 | 581 | #define FUNC_NAME s_scm_sort_x |
54e09076 | 582 | { |
c014a02e | 583 | long len; /* list/vector length */ |
c96d76b8 NJ |
584 | if (SCM_NULL_OR_NIL_P (items)) |
585 | return items; | |
b5c2579a | 586 | |
d2e53ed6 | 587 | if (scm_is_pair (items)) |
54e09076 | 588 | { |
d339981a | 589 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
34d19ef6 | 590 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 591 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 MD |
592 | } |
593 | else if (SCM_VECTORP (items)) | |
594 | { | |
b5c2579a | 595 | len = SCM_VECTOR_LENGTH (items); |
54e09076 MD |
596 | scm_restricted_vector_sort_x (items, |
597 | less, | |
e11e83f3 MV |
598 | scm_from_int (0), |
599 | scm_from_long (len)); | |
54e09076 MD |
600 | return items; |
601 | } | |
602 | else | |
276dd677 | 603 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 604 | } |
0f981281 | 605 | #undef FUNC_NAME |
54e09076 | 606 | |
1bbd0b84 | 607 | |
a1ec6916 | 608 | SCM_DEFINE (scm_sort, "sort", 2, 0, 0, |
1bbd0b84 | 609 | (SCM items, SCM less), |
e3239868 DH |
610 | "Sort the sequence @var{items}, which may be a list or a\n" |
611 | "vector. @var{less} is used for comparing the sequence\n" | |
612 | "elements. This is not a stable sort.") | |
1bbd0b84 | 613 | #define FUNC_NAME s_scm_sort |
54e09076 | 614 | { |
c96d76b8 NJ |
615 | if (SCM_NULL_OR_NIL_P (items)) |
616 | return items; | |
b5c2579a | 617 | |
d2e53ed6 | 618 | if (scm_is_pair (items)) |
54e09076 | 619 | { |
d339981a | 620 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 621 | long len; |
e9e225e5 | 622 | |
34d19ef6 | 623 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 624 | items = scm_list_copy (items); |
d339981a | 625 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 | 626 | } |
3cfe6eab | 627 | #if SCM_HAVE_ARRAYS |
afe5177e | 628 | /* support ordinary vectors even if arrays not available? */ |
54e09076 MD |
629 | else if (SCM_VECTORP (items)) |
630 | { | |
c014a02e | 631 | long len = SCM_VECTOR_LENGTH (items); |
e9e225e5 GH |
632 | SCM sortvec = scm_make_uve (len, scm_array_prototype (items)); |
633 | ||
54e09076 MD |
634 | scm_array_copy_x (items, sortvec); |
635 | scm_restricted_vector_sort_x (sortvec, | |
636 | less, | |
e11e83f3 MV |
637 | scm_from_int (0), |
638 | scm_from_long (len)); | |
54e09076 MD |
639 | return sortvec; |
640 | } | |
afe5177e | 641 | #endif |
54e09076 | 642 | else |
276dd677 | 643 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 644 | } |
0f981281 | 645 | #undef FUNC_NAME |
54e09076 | 646 | |
d339981a | 647 | |
54e09076 | 648 | static void |
1d1559ce HWN |
649 | scm_merge_vector_x (SCM vec, |
650 | SCM * temp, | |
d339981a | 651 | scm_t_trampoline_2 cmp, |
54e09076 | 652 | SCM less, |
c014a02e ML |
653 | long low, |
654 | long mid, | |
655 | long high) | |
54e09076 | 656 | { |
c014a02e ML |
657 | long it; /* Index for temp vector */ |
658 | long i1 = low; /* Index for lower vector segment */ | |
659 | long i2 = mid + 1; /* Index for upper vector segment */ | |
54e09076 MD |
660 | |
661 | /* Copy while both segments contain more characters */ | |
662 | for (it = low; (i1 <= mid) && (i2 <= high); ++it) | |
1d1559ce HWN |
663 | { |
664 | /* | |
665 | Every call of LESS might invoke GC. For full correctness, we | |
666 | should reset the generation of vecbase and tempbase between | |
667 | every call of less. | |
54e09076 | 668 | |
1d1559ce HWN |
669 | */ |
670 | register SCM *vp = SCM_WRITABLE_VELTS(vec); | |
671 | ||
7888309b | 672 | if (scm_is_true ((*cmp) (less, vp[i2], vp[i1]))) |
1d1559ce HWN |
673 | temp[it] = vp[i2++]; |
674 | else | |
675 | temp[it] = vp[i1++]; | |
676 | } | |
54e09076 | 677 | |
1d1559ce HWN |
678 | { |
679 | register SCM *vp = SCM_WRITABLE_VELTS(vec); | |
680 | ||
681 | /* Copy while first segment contains more characters */ | |
682 | while (i1 <= mid) | |
683 | temp[it++] = vp[i1++]; | |
684 | ||
685 | /* Copy while second segment contains more characters */ | |
686 | while (i2 <= high) | |
687 | temp[it++] = vp[i2++]; | |
688 | ||
689 | /* Copy back from temp to vp */ | |
690 | for (it = low; it <= high; ++it) | |
691 | vp[it] = temp[it]; | |
692 | } | |
693 | } /* scm_merge_vector_x */ | |
54e09076 | 694 | |
d339981a | 695 | |
54e09076 | 696 | static void |
1d1559ce HWN |
697 | scm_merge_vector_step (SCM vp, |
698 | SCM * temp, | |
d339981a | 699 | scm_t_trampoline_2 cmp, |
54e09076 | 700 | SCM less, |
c014a02e ML |
701 | long low, |
702 | long high) | |
54e09076 MD |
703 | { |
704 | if (high > low) | |
705 | { | |
c014a02e | 706 | long mid = (low + high) / 2; |
54e09076 MD |
707 | scm_merge_vector_step (vp, temp, cmp, less, low, mid); |
708 | scm_merge_vector_step (vp, temp, cmp, less, mid+1, high); | |
709 | scm_merge_vector_x (vp, temp, cmp, less, low, mid, high); | |
710 | } | |
711 | } /* scm_merge_vector_step */ | |
712 | ||
713 | ||
a1ec6916 | 714 | SCM_DEFINE (scm_stable_sort_x, "stable-sort!", 2, 0, 0, |
1bbd0b84 | 715 | (SCM items, SCM less), |
e3239868 DH |
716 | "Sort the sequence @var{items}, which may be a list or a\n" |
717 | "vector. @var{less} is used for comparing the sequence elements.\n" | |
718 | "The sorting is destructive, that means that the input sequence\n" | |
719 | "is modified to produce the sorted result.\n" | |
720 | "This is a stable sort.") | |
1bbd0b84 | 721 | #define FUNC_NAME s_scm_stable_sort_x |
54e09076 | 722 | { |
d339981a | 723 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 724 | long len; /* list/vector length */ |
54e09076 | 725 | |
c96d76b8 NJ |
726 | if (SCM_NULL_OR_NIL_P (items)) |
727 | return items; | |
b5c2579a | 728 | |
d2e53ed6 | 729 | if (scm_is_pair (items)) |
54e09076 | 730 | { |
34d19ef6 | 731 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 732 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 MD |
733 | } |
734 | else if (SCM_VECTORP (items)) | |
735 | { | |
1d1559ce | 736 | SCM *temp; |
b5c2579a | 737 | len = SCM_VECTOR_LENGTH (items); |
34d19ef6 | 738 | |
34d19ef6 | 739 | /* |
1d1559ce HWN |
740 | the following array does not contain any new references to |
741 | SCM objects, so we can get away with allocing it on the heap. | |
742 | */ | |
67329a9e | 743 | temp = scm_malloc (len * sizeof(SCM)); |
34d19ef6 | 744 | |
d339981a | 745 | scm_merge_vector_step (items, temp, cmp, less, 0, len - 1); |
54e09076 MD |
746 | free(temp); |
747 | return items; | |
748 | } | |
749 | else | |
276dd677 | 750 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 751 | } |
0f981281 | 752 | #undef FUNC_NAME |
54e09076 | 753 | |
d339981a | 754 | |
a1ec6916 | 755 | SCM_DEFINE (scm_stable_sort, "stable-sort", 2, 0, 0, |
1bbd0b84 | 756 | (SCM items, SCM less), |
e3239868 DH |
757 | "Sort the sequence @var{items}, which may be a list or a\n" |
758 | "vector. @var{less} is used for comparing the sequence elements.\n" | |
759 | "This is a stable sort.") | |
1bbd0b84 | 760 | #define FUNC_NAME s_scm_stable_sort |
54e09076 | 761 | { |
d339981a | 762 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
1d1559ce | 763 | |
c96d76b8 NJ |
764 | if (SCM_NULL_OR_NIL_P (items)) |
765 | return items; | |
b5c2579a | 766 | |
d2e53ed6 | 767 | if (scm_is_pair (items)) |
54e09076 | 768 | { |
1d1559ce | 769 | long len; /* list/vector length */ |
d339981a | 770 | |
34d19ef6 | 771 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 772 | items = scm_list_copy (items); |
d339981a | 773 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 | 774 | } |
3cfe6eab | 775 | #if SCM_HAVE_ARRAYS |
afe5177e | 776 | /* support ordinary vectors even if arrays not available? */ |
54e09076 MD |
777 | else if (SCM_VECTORP (items)) |
778 | { | |
1d1559ce | 779 | long len = SCM_VECTOR_LENGTH (items); |
67329a9e | 780 | SCM *temp = scm_malloc (len * sizeof (SCM)); |
1d1559ce | 781 | SCM retvec = scm_make_uve (len, scm_array_prototype (items)); |
54e09076 | 782 | scm_array_copy_x (items, retvec); |
34d19ef6 | 783 | |
d339981a | 784 | scm_merge_vector_step (retvec, temp, cmp, less, 0, len - 1); |
54e09076 MD |
785 | free (temp); |
786 | return retvec; | |
787 | } | |
afe5177e | 788 | #endif |
54e09076 | 789 | else |
276dd677 | 790 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 791 | } |
0f981281 | 792 | #undef FUNC_NAME |
54e09076 | 793 | |
d339981a | 794 | |
a1ec6916 | 795 | SCM_DEFINE (scm_sort_list_x, "sort-list!", 2, 0, 0, |
1bbd0b84 | 796 | (SCM items, SCM less), |
e3239868 DH |
797 | "Sort the list @var{items}, using @var{less} for comparing the\n" |
798 | "list elements. The sorting is destructive, that means that the\n" | |
799 | "input list is modified to produce the sorted result.\n" | |
800 | "This is a stable sort.") | |
1bbd0b84 | 801 | #define FUNC_NAME s_scm_sort_list_x |
54e09076 | 802 | { |
d339981a | 803 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 804 | long len; |
d339981a | 805 | |
34d19ef6 | 806 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 807 | return scm_merge_list_step (&items, cmp, less, len); |
1bbd0b84 | 808 | } |
0f981281 | 809 | #undef FUNC_NAME |
54e09076 | 810 | |
d339981a | 811 | |
a1ec6916 | 812 | SCM_DEFINE (scm_sort_list, "sort-list", 2, 0, 0, |
e3239868 DH |
813 | (SCM items, SCM less), |
814 | "Sort the list @var{items}, using @var{less} for comparing the\n" | |
815 | "list elements. This is a stable sort.") | |
1bbd0b84 | 816 | #define FUNC_NAME s_scm_sort_list |
54e09076 | 817 | { |
d339981a | 818 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 819 | long len; |
d339981a | 820 | |
34d19ef6 | 821 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 822 | items = scm_list_copy (items); |
d339981a | 823 | return scm_merge_list_step (&items, cmp, less, len); |
1bbd0b84 | 824 | } |
0f981281 | 825 | #undef FUNC_NAME |
54e09076 | 826 | |
d339981a | 827 | |
54e09076 MD |
828 | void |
829 | scm_init_sort () | |
830 | { | |
a0599745 | 831 | #include "libguile/sort.x" |
54e09076 MD |
832 | |
833 | scm_add_feature ("sort"); | |
834 | } | |
89e00824 ML |
835 | |
836 | /* | |
837 | Local Variables: | |
838 | c-file-style: "gnu" | |
839 | End: | |
840 | */ |