Commit | Line | Data |
---|---|---|
504d99c5 | 1 | /* Copyright (C) 1999,2000,2001,2002 Free Software Foundation, Inc. |
73be1d9e MV |
2 | * This library is free software; you can redistribute it and/or |
3 | * modify it under the terms of the GNU Lesser General Public | |
4 | * License as published by the Free Software Foundation; either | |
5 | * version 2.1 of the License, or (at your option) any later version. | |
54e09076 | 6 | * |
73be1d9e MV |
7 | * This library is distributed in the hope that it will be useful, |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10 | * Lesser General Public License for more details. | |
54e09076 | 11 | * |
73be1d9e MV |
12 | * You should have received a copy of the GNU Lesser General Public |
13 | * License along with this library; if not, write to the Free Software | |
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
15 | */ | |
54e09076 | 16 | |
1bbd0b84 GB |
17 | |
18 | ||
54e09076 MD |
19 | /* Written in December 1998 by Roland Orre <orre@nada.kth.se> |
20 | * This implements the same sort interface as slib/sort.scm | |
21 | * for lists and vectors where slib defines: | |
22 | * sorted?, merge, merge!, sort, sort! | |
15d9c4e3 | 23 | * For scsh compatibility sort-list and sort-list! are also defined. |
54e09076 | 24 | * In cases where a stable-sort is required use stable-sort or |
15d9c4e3 | 25 | * stable-sort!. An additional feature is |
54e09076 | 26 | * (restricted-vector-sort! vector less? startpos endpos) |
15d9c4e3 | 27 | * which allows you to sort part of a vector. |
54e09076 MD |
28 | * Thanks to Aubrey Jaffer for the slib/sort.scm library. |
29 | * Thanks to Richard A. O'Keefe (based on Prolog code by D.H.D.Warren) | |
30 | * for the merge sort inspiration. | |
31 | * Thanks to Douglas C. Schmidt (schmidt@ics.uci.edu) for the | |
32 | * quicksort code. | |
33 | */ | |
34 | ||
a0599745 | 35 | #include "libguile/_scm.h" |
a0599745 MD |
36 | #include "libguile/eval.h" |
37 | #include "libguile/unif.h" | |
38 | #include "libguile/ramap.h" | |
a0599745 MD |
39 | #include "libguile/feature.h" |
40 | #include "libguile/vectors.h" | |
c96d76b8 | 41 | #include "libguile/lang.h" |
54e09076 | 42 | |
a0599745 MD |
43 | #include "libguile/validate.h" |
44 | #include "libguile/sort.h" | |
54e09076 | 45 | |
d339981a | 46 | |
54e09076 MD |
47 | /* The routine quicksort was extracted from the GNU C Library qsort.c |
48 | written by Douglas C. Schmidt (schmidt@ics.uci.edu) | |
49 | and adapted to guile by adding an extra pointer less | |
50 | to quicksort by Roland Orre <orre@nada.kth.se>. | |
51 | ||
52 | The reason to do this instead of using the library function qsort | |
53 | was to avoid dependency of the ANSI-C extensions for local functions | |
54 | and also to avoid obscure pool based solutions. | |
15d9c4e3 MD |
55 | |
56 | This sorting routine is not much more efficient than the stable | |
57 | version but doesn't consume extra memory. | |
54e09076 MD |
58 | */ |
59 | ||
d339981a | 60 | #define SWAP(a, b) do { const SCM _tmp = a; a = b; b = _tmp; } while (0) |
54e09076 MD |
61 | |
62 | ||
63 | /* Order size using quicksort. This implementation incorporates | |
64 | four optimizations discussed in Sedgewick: | |
65 | ||
d339981a DH |
66 | 1. Non-recursive, using an explicit stack of pointer that store the next |
67 | array partition to sort. To save time, this maximum amount of space | |
68 | required to store an array of MAX_SIZE_T is allocated on the stack. | |
69 | Assuming a bit width of 32 bits for size_t, this needs only | |
70 | 32 * sizeof (stack_node) == 128 bytes. Pretty cheap, actually. | |
54e09076 | 71 | |
d339981a DH |
72 | 2. Chose the pivot element using a median-of-three decision tree. This |
73 | reduces the probability of selecting a bad pivot value and eliminates | |
74 | certain extraneous comparisons. | |
54e09076 | 75 | |
d339981a DH |
76 | 3. Only quicksorts NR_ELEMS / MAX_THRESH partitions, leaving insertion sort |
77 | to order the MAX_THRESH items within each partition. This is a big win, | |
78 | since insertion sort is faster for small, mostly sorted array segments. | |
54e09076 MD |
79 | |
80 | 4. The larger of the two sub-partitions is always pushed onto the | |
81 | stack first, with the algorithm then concentrating on the | |
82 | smaller partition. This *guarantees* no more than log (n) | |
83 | stack size is needed (actually O(1) in this case)! */ | |
84 | ||
54e09076 | 85 | |
d339981a DH |
86 | /* Discontinue quicksort algorithm when partition gets below this size. |
87 | * This particular magic number was chosen to work best on a Sun 4/260. */ | |
88 | #define MAX_THRESH 4 | |
89 | ||
90 | ||
91 | /* Inline stack abstraction: The stack size for quicksorting at most as many | |
92 | * elements as can be given by a value of type size_t is, as described above, | |
93 | * log (MAX_SIZE_T), which is the number of bits of size_t. More accurately, | |
94 | * we would only need ceil (log (MAX_SIZE_T / MAX_THRESH)), but this is | |
95 | * ignored below. */ | |
96 | ||
97 | /* Stack node declarations used to store unfulfilled partition obligations. */ | |
98 | typedef struct { | |
99 | size_t lo; | |
100 | size_t hi; | |
101 | } stack_node; | |
102 | ||
103 | #define STACK_SIZE (8 * sizeof (size_t)) /* assume 8 bit char */ | |
104 | #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) | |
105 | #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) | |
106 | #define STACK_NOT_EMPTY (stack < top) | |
107 | ||
a34af05e | 108 | |
54e09076 | 109 | static void |
d339981a | 110 | quicksort (SCM *const base_ptr, size_t nr_elems, scm_t_trampoline_2 cmp, SCM less) |
54e09076 | 111 | { |
d339981a | 112 | static const char s_buggy_less[] = "buggy less predicate used when sorting"; |
54e09076 | 113 | |
d339981a | 114 | if (nr_elems == 0) |
54e09076 MD |
115 | /* Avoid lossage with unsigned arithmetic below. */ |
116 | return; | |
117 | ||
d339981a | 118 | if (nr_elems > MAX_THRESH) |
54e09076 | 119 | { |
d339981a DH |
120 | size_t lo = 0; |
121 | size_t hi = nr_elems - 1; | |
122 | ||
54e09076 MD |
123 | stack_node stack[STACK_SIZE]; |
124 | stack_node *top = stack + 1; | |
125 | ||
126 | while (STACK_NOT_EMPTY) | |
127 | { | |
d339981a DH |
128 | size_t left; |
129 | size_t right; | |
54e09076 MD |
130 | |
131 | /* Select median value from among LO, MID, and HI. Rearrange | |
132 | LO and HI so the three values are sorted. This lowers the | |
133 | probability of picking a pathological pivot value and | |
d339981a | 134 | skips a comparison for both the left and right. */ |
54e09076 | 135 | |
d339981a | 136 | size_t mid = lo + (hi - lo) / 2; |
54e09076 | 137 | |
d339981a DH |
138 | if (!SCM_FALSEP ((*cmp) (less, base_ptr[mid], base_ptr[lo]))) |
139 | SWAP (base_ptr[mid], base_ptr[lo]); | |
140 | if (!SCM_FALSEP ((*cmp) (less, base_ptr[hi], base_ptr[mid]))) | |
141 | SWAP (base_ptr[mid], base_ptr[hi]); | |
54e09076 MD |
142 | else |
143 | goto jump_over; | |
d339981a DH |
144 | if (!SCM_FALSEP ((*cmp) (less, base_ptr[mid], base_ptr[lo]))) |
145 | SWAP (base_ptr[mid], base_ptr[lo]); | |
54e09076 | 146 | jump_over:; |
54e09076 | 147 | |
d339981a DH |
148 | left = lo + 1; |
149 | right = hi - 1; | |
54e09076 MD |
150 | |
151 | /* Here's the famous ``collapse the walls'' section of quicksort. | |
152 | Gotta like those tight inner loops! They are the main reason | |
153 | that this algorithm runs much faster than others. */ | |
154 | do | |
155 | { | |
d339981a | 156 | while (!SCM_FALSEP ((*cmp) (less, base_ptr[left], base_ptr[mid]))) |
a34af05e | 157 | { |
d339981a | 158 | left++; |
a34af05e | 159 | /* The comparison predicate may be buggy */ |
d339981a | 160 | if (left > hi) |
5d2d2ffc | 161 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 162 | } |
54e09076 | 163 | |
d339981a | 164 | while (!SCM_FALSEP ((*cmp) (less, base_ptr[mid], base_ptr[right]))) |
a34af05e | 165 | { |
d339981a | 166 | right--; |
a34af05e | 167 | /* The comparison predicate may be buggy */ |
d339981a | 168 | if (right < lo) |
5d2d2ffc | 169 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 170 | } |
54e09076 | 171 | |
d339981a | 172 | if (left < right) |
54e09076 | 173 | { |
d339981a DH |
174 | SWAP (base_ptr[left], base_ptr[right]); |
175 | left++; | |
176 | right--; | |
54e09076 | 177 | } |
d339981a | 178 | else if (left == right) |
54e09076 | 179 | { |
d339981a DH |
180 | left++; |
181 | right--; | |
54e09076 MD |
182 | break; |
183 | } | |
184 | } | |
d339981a | 185 | while (left <= right); |
54e09076 MD |
186 | |
187 | /* Set up pointers for next iteration. First determine whether | |
188 | left and right partitions are below the threshold size. If so, | |
189 | ignore one or both. Otherwise, push the larger partition's | |
190 | bounds on the stack and continue sorting the smaller one. */ | |
191 | ||
d339981a | 192 | if ((size_t) (right - lo) <= MAX_THRESH) |
54e09076 | 193 | { |
d339981a | 194 | if ((size_t) (hi - left) <= MAX_THRESH) |
54e09076 MD |
195 | /* Ignore both small partitions. */ |
196 | POP (lo, hi); | |
197 | else | |
198 | /* Ignore small left partition. */ | |
d339981a | 199 | lo = left; |
54e09076 | 200 | } |
d339981a | 201 | else if ((size_t) (hi - left) <= MAX_THRESH) |
54e09076 | 202 | /* Ignore small right partition. */ |
d339981a DH |
203 | hi = right; |
204 | else if ((right - lo) > (hi - left)) | |
54e09076 MD |
205 | { |
206 | /* Push larger left partition indices. */ | |
d339981a DH |
207 | PUSH (lo, right); |
208 | lo = left; | |
54e09076 MD |
209 | } |
210 | else | |
211 | { | |
212 | /* Push larger right partition indices. */ | |
d339981a DH |
213 | PUSH (left, hi); |
214 | hi = right; | |
54e09076 MD |
215 | } |
216 | } | |
217 | } | |
218 | ||
d339981a DH |
219 | /* Once the BASE_PTR array is partially sorted by quicksort the rest is |
220 | completely sorted using insertion sort, since this is efficient for | |
221 | partitions below MAX_THRESH size. BASE_PTR points to the beginning of the | |
222 | array to sort, and END idexes the very last element in the array (*not* | |
223 | one beyond it!). */ | |
54e09076 MD |
224 | |
225 | { | |
d339981a DH |
226 | size_t tmp = 0; |
227 | size_t end = nr_elems - 1; | |
228 | size_t thresh = min (end, MAX_THRESH); | |
229 | size_t run; | |
54e09076 MD |
230 | |
231 | /* Find smallest element in first threshold and place it at the | |
232 | array's beginning. This is the smallest array element, | |
233 | and the operation speeds up insertion sort's inner loop. */ | |
234 | ||
d339981a DH |
235 | for (run = tmp + 1; run <= thresh; run++) |
236 | if (!SCM_FALSEP ((*cmp) (less, base_ptr[run], base_ptr[tmp]))) | |
237 | tmp = run; | |
54e09076 | 238 | |
d339981a DH |
239 | if (tmp != 0) |
240 | SWAP (base_ptr[tmp], base_ptr[0]); | |
54e09076 MD |
241 | |
242 | /* Insertion sort, running from left-hand-side up to right-hand-side. */ | |
243 | ||
d339981a DH |
244 | run = 1; |
245 | while (++run <= end) | |
54e09076 | 246 | { |
d339981a DH |
247 | tmp = run - 1; |
248 | while (!SCM_FALSEP ((*cmp) (less, base_ptr[run], base_ptr[tmp]))) | |
a34af05e | 249 | { |
a34af05e | 250 | /* The comparison predicate may be buggy */ |
d339981a | 251 | if (tmp == 0) |
5d2d2ffc | 252 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
d339981a DH |
253 | |
254 | tmp--; | |
a34af05e | 255 | } |
54e09076 | 256 | |
d339981a DH |
257 | tmp++; |
258 | if (tmp != run) | |
54e09076 | 259 | { |
d339981a DH |
260 | SCM to_insert = base_ptr[run]; |
261 | size_t hi, lo; | |
262 | ||
263 | for (hi = lo = run; --lo >= tmp; hi = lo) | |
264 | base_ptr[hi] = base_ptr[lo]; | |
265 | base_ptr[hi] = to_insert; | |
54e09076 MD |
266 | } |
267 | } | |
268 | } | |
d339981a | 269 | } |
54e09076 | 270 | |
54e09076 | 271 | |
d339981a DH |
272 | static scm_t_trampoline_2 |
273 | compare_function (SCM less, unsigned int arg_nr, const char* fname) | |
54e09076 | 274 | { |
d339981a DH |
275 | const scm_t_trampoline_2 cmp = scm_trampoline_2 (less); |
276 | SCM_ASSERT_TYPE (cmp != NULL, less, arg_nr, fname, "less predicate"); | |
277 | return cmp; | |
278 | } | |
54e09076 | 279 | |
54e09076 MD |
280 | |
281 | /* Question: Is there any need to make this a more general array sort? | |
282 | It is probably enough to manage the vector type. */ | |
283 | /* endpos equal as for substring, i.e. endpos is not included. */ | |
da4a1dba | 284 | /* More natural with length? */ |
1bbd0b84 | 285 | |
a1ec6916 | 286 | SCM_DEFINE (scm_restricted_vector_sort_x, "restricted-vector-sort!", 4, 0, 0, |
1bbd0b84 | 287 | (SCM vec, SCM less, SCM startpos, SCM endpos), |
e3239868 DH |
288 | "Sort the vector @var{vec}, using @var{less} for comparing\n" |
289 | "the vector elements. @var{startpos} and @var{endpos} delimit\n" | |
290 | "the range of the vector which gets sorted. The return value\n" | |
291 | "is not specified.") | |
1bbd0b84 | 292 | #define FUNC_NAME s_scm_restricted_vector_sort_x |
54e09076 | 293 | { |
d339981a DH |
294 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
295 | size_t vlen, spos, len; | |
54e09076 MD |
296 | SCM *vp; |
297 | ||
34d19ef6 | 298 | SCM_VALIDATE_VECTOR (1, vec); |
34d19ef6 | 299 | vp = SCM_WRITABLE_VELTS (vec); /* vector pointer */ |
b5c2579a | 300 | vlen = SCM_VECTOR_LENGTH (vec); |
54e09076 | 301 | |
729dbac3 | 302 | SCM_VALIDATE_INUM_MIN_COPY (3, startpos, 0, spos); |
34d19ef6 HWN |
303 | SCM_ASSERT_RANGE (3, startpos, spos <= vlen); |
304 | SCM_VALIDATE_INUM_RANGE (4, endpos,0, vlen+1); | |
54e09076 MD |
305 | len = SCM_INUM (endpos) - spos; |
306 | ||
d339981a DH |
307 | quicksort (&vp[spos], len, cmp, less); |
308 | scm_remember_upto_here_1 (vec); | |
34d19ef6 | 309 | |
d339981a | 310 | return SCM_UNSPECIFIED; |
1bbd0b84 GB |
311 | } |
312 | #undef FUNC_NAME | |
54e09076 | 313 | |
d339981a | 314 | |
54e09076 MD |
315 | /* (sorted? sequence less?) |
316 | * is true when sequence is a list (x0 x1 ... xm) or a vector #(x0 ... xm) | |
317 | * such that for all 1 <= i <= m, | |
318 | * (not (less? (list-ref list i) (list-ref list (- i 1)))). */ | |
a1ec6916 | 319 | SCM_DEFINE (scm_sorted_p, "sorted?", 2, 0, 0, |
1bbd0b84 | 320 | (SCM items, SCM less), |
e3239868 DH |
321 | "Return @code{#t} iff @var{items} is a list or a vector such that\n" |
322 | "for all 1 <= i <= m, the predicate @var{less} returns true when\n" | |
323 | "applied to all elements i - 1 and i") | |
1bbd0b84 | 324 | #define FUNC_NAME s_scm_sorted_p |
54e09076 | 325 | { |
d339981a | 326 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 327 | long len, j; /* list/vector length, temp j */ |
54e09076 | 328 | SCM item, rest; /* rest of items loop variable */ |
34d19ef6 | 329 | SCM const *vp; |
54e09076 | 330 | |
c96d76b8 | 331 | if (SCM_NULL_OR_NIL_P (items)) |
54e09076 | 332 | return SCM_BOOL_T; |
1bbd0b84 | 333 | |
54e09076 MD |
334 | if (SCM_CONSP (items)) |
335 | { | |
336 | len = scm_ilength (items); /* also checks that it's a pure list */ | |
34d19ef6 | 337 | SCM_ASSERT_RANGE (1, items, len >= 0); |
54e09076 MD |
338 | if (len <= 1) |
339 | return SCM_BOOL_T; | |
340 | ||
341 | item = SCM_CAR (items); | |
342 | rest = SCM_CDR (items); | |
343 | j = len - 1; | |
344 | while (j > 0) | |
345 | { | |
d339981a | 346 | if (!SCM_FALSEP ((*cmp) (less, SCM_CAR (rest), item))) |
54e09076 MD |
347 | return SCM_BOOL_F; |
348 | else | |
349 | { | |
350 | item = SCM_CAR (rest); | |
351 | rest = SCM_CDR (rest); | |
352 | j--; | |
353 | } | |
354 | } | |
355 | return SCM_BOOL_T; | |
356 | } | |
357 | else | |
358 | { | |
b5c2579a DH |
359 | SCM_VALIDATE_VECTOR (1, items); |
360 | ||
361 | vp = SCM_VELTS (items); /* vector pointer */ | |
362 | len = SCM_VECTOR_LENGTH (items); | |
363 | j = len - 1; | |
364 | while (j > 0) | |
54e09076 | 365 | { |
d339981a | 366 | if (!SCM_FALSEP ((*cmp) (less, vp[1], vp[0]))) |
b5c2579a DH |
367 | return SCM_BOOL_F; |
368 | else | |
369 | { | |
370 | vp++; | |
371 | j--; | |
372 | } | |
54e09076 | 373 | } |
b5c2579a | 374 | return SCM_BOOL_T; |
54e09076 | 375 | } |
b5c2579a | 376 | |
54e09076 | 377 | return SCM_BOOL_F; |
1bbd0b84 GB |
378 | } |
379 | #undef FUNC_NAME | |
54e09076 | 380 | |
d339981a | 381 | |
54e09076 MD |
382 | /* (merge a b less?) |
383 | takes two lists a and b such that (sorted? a less?) and (sorted? b less?) | |
384 | and returns a new list in which the elements of a and b have been stably | |
385 | interleaved so that (sorted? (merge a b less?) less?). | |
386 | Note: this does _not_ accept vectors. */ | |
a1ec6916 | 387 | SCM_DEFINE (scm_merge, "merge", 3, 0, 0, |
1bbd0b84 | 388 | (SCM alist, SCM blist, SCM less), |
8f85c0c6 NJ |
389 | "Merge two already sorted lists into one.\n" |
390 | "Given two lists @var{alist} and @var{blist}, such that\n" | |
391 | "@code{(sorted? alist less?)} and @code{(sorted? blist less?)},\n" | |
392 | "return a new list in which the elements of @var{alist} and\n" | |
e3239868 DH |
393 | "@var{blist} have been stably interleaved so that\n" |
394 | "@code{(sorted? (merge alist blist less?) less?)}.\n" | |
395 | "Note: this does _not_ accept vectors.") | |
1bbd0b84 | 396 | #define FUNC_NAME s_scm_merge |
54e09076 | 397 | { |
d339981a | 398 | SCM build; |
54e09076 | 399 | |
c96d76b8 | 400 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 401 | return blist; |
c96d76b8 | 402 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
403 | return alist; |
404 | else | |
405 | { | |
d339981a DH |
406 | const scm_t_trampoline_2 cmp = compare_function (less, 3, FUNC_NAME); |
407 | long alen, blen; /* list lengths */ | |
408 | SCM last; | |
409 | ||
34d19ef6 HWN |
410 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1, alist, alen); |
411 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2, blist, blen); | |
d339981a | 412 | if (!SCM_FALSEP ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
413 | { |
414 | build = scm_cons (SCM_CAR (blist), SCM_EOL); | |
415 | blist = SCM_CDR (blist); | |
416 | blen--; | |
417 | } | |
c56cc3c8 MD |
418 | else |
419 | { | |
420 | build = scm_cons (SCM_CAR (alist), SCM_EOL); | |
421 | alist = SCM_CDR (alist); | |
422 | alen--; | |
423 | } | |
54e09076 MD |
424 | last = build; |
425 | while ((alen > 0) && (blen > 0)) | |
426 | { | |
d339981a | 427 | if (!SCM_FALSEP ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
428 | { |
429 | SCM_SETCDR (last, scm_cons (SCM_CAR (blist), SCM_EOL)); | |
430 | blist = SCM_CDR (blist); | |
431 | blen--; | |
432 | } | |
c56cc3c8 MD |
433 | else |
434 | { | |
435 | SCM_SETCDR (last, scm_cons (SCM_CAR (alist), SCM_EOL)); | |
436 | alist = SCM_CDR (alist); | |
437 | alen--; | |
438 | } | |
54e09076 MD |
439 | last = SCM_CDR (last); |
440 | } | |
441 | if ((alen > 0) && (blen == 0)) | |
442 | SCM_SETCDR (last, alist); | |
443 | else if ((alen == 0) && (blen > 0)) | |
444 | SCM_SETCDR (last, blist); | |
445 | } | |
446 | return build; | |
1bbd0b84 GB |
447 | } |
448 | #undef FUNC_NAME | |
449 | ||
54e09076 MD |
450 | |
451 | static SCM | |
452 | scm_merge_list_x (SCM alist, SCM blist, | |
453 | long alen, long blen, | |
d339981a | 454 | scm_t_trampoline_2 cmp, SCM less) |
54e09076 MD |
455 | { |
456 | SCM build, last; | |
457 | ||
c96d76b8 | 458 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 459 | return blist; |
c96d76b8 | 460 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
461 | return alist; |
462 | else | |
463 | { | |
d339981a | 464 | if (!SCM_FALSEP ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
465 | { |
466 | build = blist; | |
467 | blist = SCM_CDR (blist); | |
468 | blen--; | |
469 | } | |
c56cc3c8 MD |
470 | else |
471 | { | |
472 | build = alist; | |
473 | alist = SCM_CDR (alist); | |
474 | alen--; | |
475 | } | |
54e09076 MD |
476 | last = build; |
477 | while ((alen > 0) && (blen > 0)) | |
478 | { | |
d339981a | 479 | if (!SCM_FALSEP ((*cmp) (less, SCM_CAR (blist), SCM_CAR (alist)))) |
54e09076 MD |
480 | { |
481 | SCM_SETCDR (last, blist); | |
482 | blist = SCM_CDR (blist); | |
483 | blen--; | |
484 | } | |
c56cc3c8 MD |
485 | else |
486 | { | |
487 | SCM_SETCDR (last, alist); | |
488 | alist = SCM_CDR (alist); | |
489 | alen--; | |
490 | } | |
54e09076 MD |
491 | last = SCM_CDR (last); |
492 | } | |
493 | if ((alen > 0) && (blen == 0)) | |
494 | SCM_SETCDR (last, alist); | |
495 | else if ((alen == 0) && (blen > 0)) | |
496 | SCM_SETCDR (last, blist); | |
497 | } | |
498 | return build; | |
499 | } /* scm_merge_list_x */ | |
500 | ||
d339981a | 501 | |
a1ec6916 | 502 | SCM_DEFINE (scm_merge_x, "merge!", 3, 0, 0, |
1bbd0b84 | 503 | (SCM alist, SCM blist, SCM less), |
e3239868 DH |
504 | "Takes two lists @var{alist} and @var{blist} such that\n" |
505 | "@code{(sorted? alist less?)} and @code{(sorted? blist less?)} and\n" | |
506 | "returns a new list in which the elements of @var{alist} and\n" | |
507 | "@var{blist} have been stably interleaved so that\n" | |
508 | " @code{(sorted? (merge alist blist less?) less?)}.\n" | |
509 | "This is the destructive variant of @code{merge}\n" | |
510 | "Note: this does _not_ accept vectors.") | |
1bbd0b84 | 511 | #define FUNC_NAME s_scm_merge_x |
54e09076 | 512 | { |
c96d76b8 | 513 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 514 | return blist; |
c96d76b8 | 515 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
516 | return alist; |
517 | else | |
518 | { | |
d339981a DH |
519 | const scm_t_trampoline_2 cmp = compare_function (less, 3, FUNC_NAME); |
520 | long alen, blen; /* list lengths */ | |
34d19ef6 HWN |
521 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1, alist, alen); |
522 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2, blist, blen); | |
d339981a | 523 | return scm_merge_list_x (alist, blist, alen, blen, cmp, less); |
54e09076 | 524 | } |
1bbd0b84 GB |
525 | } |
526 | #undef FUNC_NAME | |
54e09076 | 527 | |
d339981a | 528 | |
54e09076 MD |
529 | /* This merge sort algorithm is same as slib's by Richard A. O'Keefe. |
530 | The algorithm is stable. We also tried to use the algorithm used by | |
531 | scsh's merge-sort but that algorithm showed to not be stable, even | |
532 | though it claimed to be. | |
533 | */ | |
534 | static SCM | |
d339981a | 535 | scm_merge_list_step (SCM * seq, scm_t_trampoline_2 cmp, SCM less, long n) |
54e09076 | 536 | { |
c56cc3c8 MD |
537 | SCM a, b; |
538 | ||
54e09076 MD |
539 | if (n > 2) |
540 | { | |
c014a02e | 541 | long mid = n / 2; |
c56cc3c8 MD |
542 | a = scm_merge_list_step (seq, cmp, less, mid); |
543 | b = scm_merge_list_step (seq, cmp, less, n - mid); | |
544 | return scm_merge_list_x (a, b, mid, n - mid, cmp, less); | |
54e09076 MD |
545 | } |
546 | else if (n == 2) | |
547 | { | |
548 | SCM p = *seq; | |
549 | SCM rest = SCM_CDR (*seq); | |
550 | SCM x = SCM_CAR (*seq); | |
551 | SCM y = SCM_CAR (SCM_CDR (*seq)); | |
552 | *seq = SCM_CDR (rest); | |
553 | SCM_SETCDR (rest, SCM_EOL); | |
d339981a | 554 | if (!SCM_FALSEP ((*cmp) (less, y, x))) |
54e09076 | 555 | { |
4b479d98 DH |
556 | SCM_SETCAR (p, y); |
557 | SCM_SETCAR (rest, x); | |
54e09076 MD |
558 | } |
559 | return p; | |
560 | } | |
561 | else if (n == 1) | |
562 | { | |
563 | SCM p = *seq; | |
564 | *seq = SCM_CDR (p); | |
565 | SCM_SETCDR (p, SCM_EOL); | |
566 | return p; | |
567 | } | |
568 | else | |
569 | return SCM_EOL; | |
570 | } /* scm_merge_list_step */ | |
571 | ||
572 | ||
a1ec6916 | 573 | SCM_DEFINE (scm_sort_x, "sort!", 2, 0, 0, |
1bbd0b84 | 574 | (SCM items, SCM less), |
e3239868 DH |
575 | "Sort the sequence @var{items}, which may be a list or a\n" |
576 | "vector. @var{less} is used for comparing the sequence\n" | |
577 | "elements. The sorting is destructive, that means that the\n" | |
578 | "input sequence is modified to produce the sorted result.\n" | |
579 | "This is not a stable sort.") | |
1bbd0b84 | 580 | #define FUNC_NAME s_scm_sort_x |
54e09076 | 581 | { |
c014a02e | 582 | long len; /* list/vector length */ |
c96d76b8 NJ |
583 | if (SCM_NULL_OR_NIL_P (items)) |
584 | return items; | |
b5c2579a | 585 | |
54e09076 MD |
586 | if (SCM_CONSP (items)) |
587 | { | |
d339981a | 588 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
34d19ef6 | 589 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 590 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 MD |
591 | } |
592 | else if (SCM_VECTORP (items)) | |
593 | { | |
b5c2579a | 594 | len = SCM_VECTOR_LENGTH (items); |
54e09076 MD |
595 | scm_restricted_vector_sort_x (items, |
596 | less, | |
597 | SCM_MAKINUM (0L), | |
598 | SCM_MAKINUM (len)); | |
599 | return items; | |
600 | } | |
601 | else | |
276dd677 | 602 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 603 | } |
0f981281 | 604 | #undef FUNC_NAME |
54e09076 | 605 | |
1bbd0b84 | 606 | |
a1ec6916 | 607 | SCM_DEFINE (scm_sort, "sort", 2, 0, 0, |
1bbd0b84 | 608 | (SCM items, SCM less), |
e3239868 DH |
609 | "Sort the sequence @var{items}, which may be a list or a\n" |
610 | "vector. @var{less} is used for comparing the sequence\n" | |
611 | "elements. This is not a stable sort.") | |
1bbd0b84 | 612 | #define FUNC_NAME s_scm_sort |
54e09076 | 613 | { |
c96d76b8 NJ |
614 | if (SCM_NULL_OR_NIL_P (items)) |
615 | return items; | |
b5c2579a | 616 | |
54e09076 MD |
617 | if (SCM_CONSP (items)) |
618 | { | |
d339981a | 619 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 620 | long len; |
e9e225e5 | 621 | |
34d19ef6 | 622 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 623 | items = scm_list_copy (items); |
d339981a | 624 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 | 625 | } |
3cfe6eab | 626 | #if SCM_HAVE_ARRAYS |
afe5177e | 627 | /* support ordinary vectors even if arrays not available? */ |
54e09076 MD |
628 | else if (SCM_VECTORP (items)) |
629 | { | |
c014a02e | 630 | long len = SCM_VECTOR_LENGTH (items); |
e9e225e5 GH |
631 | SCM sortvec = scm_make_uve (len, scm_array_prototype (items)); |
632 | ||
54e09076 MD |
633 | scm_array_copy_x (items, sortvec); |
634 | scm_restricted_vector_sort_x (sortvec, | |
635 | less, | |
636 | SCM_MAKINUM (0L), | |
637 | SCM_MAKINUM (len)); | |
638 | return sortvec; | |
639 | } | |
afe5177e | 640 | #endif |
54e09076 | 641 | else |
276dd677 | 642 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 643 | } |
0f981281 | 644 | #undef FUNC_NAME |
54e09076 | 645 | |
d339981a | 646 | |
54e09076 | 647 | static void |
1d1559ce HWN |
648 | scm_merge_vector_x (SCM vec, |
649 | SCM * temp, | |
d339981a | 650 | scm_t_trampoline_2 cmp, |
54e09076 | 651 | SCM less, |
c014a02e ML |
652 | long low, |
653 | long mid, | |
654 | long high) | |
54e09076 | 655 | { |
c014a02e ML |
656 | long it; /* Index for temp vector */ |
657 | long i1 = low; /* Index for lower vector segment */ | |
658 | long i2 = mid + 1; /* Index for upper vector segment */ | |
54e09076 MD |
659 | |
660 | /* Copy while both segments contain more characters */ | |
661 | for (it = low; (i1 <= mid) && (i2 <= high); ++it) | |
1d1559ce HWN |
662 | { |
663 | /* | |
664 | Every call of LESS might invoke GC. For full correctness, we | |
665 | should reset the generation of vecbase and tempbase between | |
666 | every call of less. | |
54e09076 | 667 | |
1d1559ce HWN |
668 | */ |
669 | register SCM *vp = SCM_WRITABLE_VELTS(vec); | |
670 | ||
d339981a | 671 | if (!SCM_FALSEP ((*cmp) (less, vp[i2], vp[i1]))) |
1d1559ce HWN |
672 | temp[it] = vp[i2++]; |
673 | else | |
674 | temp[it] = vp[i1++]; | |
675 | } | |
54e09076 | 676 | |
1d1559ce HWN |
677 | { |
678 | register SCM *vp = SCM_WRITABLE_VELTS(vec); | |
679 | ||
680 | /* Copy while first segment contains more characters */ | |
681 | while (i1 <= mid) | |
682 | temp[it++] = vp[i1++]; | |
683 | ||
684 | /* Copy while second segment contains more characters */ | |
685 | while (i2 <= high) | |
686 | temp[it++] = vp[i2++]; | |
687 | ||
688 | /* Copy back from temp to vp */ | |
689 | for (it = low; it <= high; ++it) | |
690 | vp[it] = temp[it]; | |
691 | } | |
692 | } /* scm_merge_vector_x */ | |
54e09076 | 693 | |
d339981a | 694 | |
54e09076 | 695 | static void |
1d1559ce HWN |
696 | scm_merge_vector_step (SCM vp, |
697 | SCM * temp, | |
d339981a | 698 | scm_t_trampoline_2 cmp, |
54e09076 | 699 | SCM less, |
c014a02e ML |
700 | long low, |
701 | long high) | |
54e09076 MD |
702 | { |
703 | if (high > low) | |
704 | { | |
c014a02e | 705 | long mid = (low + high) / 2; |
54e09076 MD |
706 | scm_merge_vector_step (vp, temp, cmp, less, low, mid); |
707 | scm_merge_vector_step (vp, temp, cmp, less, mid+1, high); | |
708 | scm_merge_vector_x (vp, temp, cmp, less, low, mid, high); | |
709 | } | |
710 | } /* scm_merge_vector_step */ | |
711 | ||
712 | ||
a1ec6916 | 713 | SCM_DEFINE (scm_stable_sort_x, "stable-sort!", 2, 0, 0, |
1bbd0b84 | 714 | (SCM items, SCM less), |
e3239868 DH |
715 | "Sort the sequence @var{items}, which may be a list or a\n" |
716 | "vector. @var{less} is used for comparing the sequence elements.\n" | |
717 | "The sorting is destructive, that means that the input sequence\n" | |
718 | "is modified to produce the sorted result.\n" | |
719 | "This is a stable sort.") | |
1bbd0b84 | 720 | #define FUNC_NAME s_scm_stable_sort_x |
54e09076 | 721 | { |
d339981a | 722 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 723 | long len; /* list/vector length */ |
54e09076 | 724 | |
c96d76b8 NJ |
725 | if (SCM_NULL_OR_NIL_P (items)) |
726 | return items; | |
b5c2579a | 727 | |
54e09076 MD |
728 | if (SCM_CONSP (items)) |
729 | { | |
34d19ef6 | 730 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 731 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 MD |
732 | } |
733 | else if (SCM_VECTORP (items)) | |
734 | { | |
1d1559ce | 735 | SCM *temp; |
b5c2579a | 736 | len = SCM_VECTOR_LENGTH (items); |
34d19ef6 | 737 | |
34d19ef6 | 738 | /* |
1d1559ce HWN |
739 | the following array does not contain any new references to |
740 | SCM objects, so we can get away with allocing it on the heap. | |
741 | */ | |
67329a9e | 742 | temp = scm_malloc (len * sizeof(SCM)); |
34d19ef6 | 743 | |
d339981a | 744 | scm_merge_vector_step (items, temp, cmp, less, 0, len - 1); |
54e09076 MD |
745 | free(temp); |
746 | return items; | |
747 | } | |
748 | else | |
276dd677 | 749 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 750 | } |
0f981281 | 751 | #undef FUNC_NAME |
54e09076 | 752 | |
d339981a | 753 | |
a1ec6916 | 754 | SCM_DEFINE (scm_stable_sort, "stable-sort", 2, 0, 0, |
1bbd0b84 | 755 | (SCM items, SCM less), |
e3239868 DH |
756 | "Sort the sequence @var{items}, which may be a list or a\n" |
757 | "vector. @var{less} is used for comparing the sequence elements.\n" | |
758 | "This is a stable sort.") | |
1bbd0b84 | 759 | #define FUNC_NAME s_scm_stable_sort |
54e09076 | 760 | { |
d339981a | 761 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
1d1559ce | 762 | |
c96d76b8 NJ |
763 | if (SCM_NULL_OR_NIL_P (items)) |
764 | return items; | |
b5c2579a | 765 | |
54e09076 MD |
766 | if (SCM_CONSP (items)) |
767 | { | |
1d1559ce | 768 | long len; /* list/vector length */ |
d339981a | 769 | |
34d19ef6 | 770 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 771 | items = scm_list_copy (items); |
d339981a | 772 | return scm_merge_list_step (&items, cmp, less, len); |
54e09076 | 773 | } |
3cfe6eab | 774 | #if SCM_HAVE_ARRAYS |
afe5177e | 775 | /* support ordinary vectors even if arrays not available? */ |
54e09076 MD |
776 | else if (SCM_VECTORP (items)) |
777 | { | |
1d1559ce | 778 | long len = SCM_VECTOR_LENGTH (items); |
67329a9e | 779 | SCM *temp = scm_malloc (len * sizeof (SCM)); |
1d1559ce | 780 | SCM retvec = scm_make_uve (len, scm_array_prototype (items)); |
54e09076 | 781 | scm_array_copy_x (items, retvec); |
34d19ef6 | 782 | |
d339981a | 783 | scm_merge_vector_step (retvec, temp, cmp, less, 0, len - 1); |
54e09076 MD |
784 | free (temp); |
785 | return retvec; | |
786 | } | |
afe5177e | 787 | #endif |
54e09076 | 788 | else |
276dd677 | 789 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 790 | } |
0f981281 | 791 | #undef FUNC_NAME |
54e09076 | 792 | |
d339981a | 793 | |
a1ec6916 | 794 | SCM_DEFINE (scm_sort_list_x, "sort-list!", 2, 0, 0, |
1bbd0b84 | 795 | (SCM items, SCM less), |
e3239868 DH |
796 | "Sort the list @var{items}, using @var{less} for comparing the\n" |
797 | "list elements. The sorting is destructive, that means that the\n" | |
798 | "input list is modified to produce the sorted result.\n" | |
799 | "This is a stable sort.") | |
1bbd0b84 | 800 | #define FUNC_NAME s_scm_sort_list_x |
54e09076 | 801 | { |
d339981a | 802 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 803 | long len; |
d339981a | 804 | |
34d19ef6 | 805 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
d339981a | 806 | return scm_merge_list_step (&items, cmp, less, len); |
1bbd0b84 | 807 | } |
0f981281 | 808 | #undef FUNC_NAME |
54e09076 | 809 | |
d339981a | 810 | |
a1ec6916 | 811 | SCM_DEFINE (scm_sort_list, "sort-list", 2, 0, 0, |
e3239868 DH |
812 | (SCM items, SCM less), |
813 | "Sort the list @var{items}, using @var{less} for comparing the\n" | |
814 | "list elements. This is a stable sort.") | |
1bbd0b84 | 815 | #define FUNC_NAME s_scm_sort_list |
54e09076 | 816 | { |
d339981a | 817 | const scm_t_trampoline_2 cmp = compare_function (less, 2, FUNC_NAME); |
c014a02e | 818 | long len; |
d339981a | 819 | |
34d19ef6 | 820 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 | 821 | items = scm_list_copy (items); |
d339981a | 822 | return scm_merge_list_step (&items, cmp, less, len); |
1bbd0b84 | 823 | } |
0f981281 | 824 | #undef FUNC_NAME |
54e09076 | 825 | |
d339981a | 826 | |
54e09076 MD |
827 | void |
828 | scm_init_sort () | |
829 | { | |
a0599745 | 830 | #include "libguile/sort.x" |
54e09076 MD |
831 | |
832 | scm_add_feature ("sort"); | |
833 | } | |
89e00824 ML |
834 | |
835 | /* | |
836 | Local Variables: | |
837 | c-file-style: "gnu" | |
838 | End: | |
839 | */ |