Commit | Line | Data |
---|---|---|
504d99c5 | 1 | /* Copyright (C) 1999,2000,2001,2002 Free Software Foundation, Inc. |
73be1d9e MV |
2 | * This library is free software; you can redistribute it and/or |
3 | * modify it under the terms of the GNU Lesser General Public | |
4 | * License as published by the Free Software Foundation; either | |
5 | * version 2.1 of the License, or (at your option) any later version. | |
54e09076 | 6 | * |
73be1d9e MV |
7 | * This library is distributed in the hope that it will be useful, |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
10 | * Lesser General Public License for more details. | |
54e09076 | 11 | * |
73be1d9e MV |
12 | * You should have received a copy of the GNU Lesser General Public |
13 | * License along with this library; if not, write to the Free Software | |
14 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
15 | */ | |
54e09076 | 16 | |
1bbd0b84 GB |
17 | |
18 | ||
54e09076 MD |
19 | /* Written in December 1998 by Roland Orre <orre@nada.kth.se> |
20 | * This implements the same sort interface as slib/sort.scm | |
21 | * for lists and vectors where slib defines: | |
22 | * sorted?, merge, merge!, sort, sort! | |
15d9c4e3 | 23 | * For scsh compatibility sort-list and sort-list! are also defined. |
54e09076 | 24 | * In cases where a stable-sort is required use stable-sort or |
15d9c4e3 | 25 | * stable-sort!. An additional feature is |
54e09076 | 26 | * (restricted-vector-sort! vector less? startpos endpos) |
15d9c4e3 | 27 | * which allows you to sort part of a vector. |
54e09076 MD |
28 | * Thanks to Aubrey Jaffer for the slib/sort.scm library. |
29 | * Thanks to Richard A. O'Keefe (based on Prolog code by D.H.D.Warren) | |
30 | * for the merge sort inspiration. | |
31 | * Thanks to Douglas C. Schmidt (schmidt@ics.uci.edu) for the | |
32 | * quicksort code. | |
33 | */ | |
34 | ||
35 | /* We need this to get the definitions for HAVE_ALLOCA_H, etc. */ | |
af7a3945 RB |
36 | #if HAVE_CONFIG_H |
37 | # include <config.h> | |
38 | #endif | |
39 | ||
40 | /* do we still need this here? */ | |
a0599745 | 41 | #include "libguile/scmconfig.h" |
54e09076 MD |
42 | |
43 | /* AIX requires this to be the first thing in the file. The #pragma | |
44 | directive is indented so pre-ANSI compilers will ignore it, rather | |
45 | than choke on it. */ | |
46 | #ifndef __GNUC__ | |
47 | # if HAVE_ALLOCA_H | |
48 | # include <alloca.h> | |
49 | # else | |
50 | # ifdef _AIX | |
51 | #pragma alloca | |
52 | # else | |
53 | # ifndef alloca /* predefined by HP cc +Olibcalls */ | |
54 | char *alloca (); | |
55 | # endif | |
56 | # endif | |
57 | # endif | |
58 | #endif | |
59 | ||
783e7774 | 60 | #include <string.h> |
a0599745 | 61 | #include "libguile/_scm.h" |
54e09076 | 62 | |
a0599745 MD |
63 | #include "libguile/eval.h" |
64 | #include "libguile/unif.h" | |
65 | #include "libguile/ramap.h" | |
66 | #include "libguile/alist.h" | |
67 | #include "libguile/feature.h" | |
fdc28395 | 68 | #include "libguile/root.h" |
a0599745 | 69 | #include "libguile/vectors.h" |
c96d76b8 | 70 | #include "libguile/lang.h" |
54e09076 | 71 | |
a0599745 MD |
72 | #include "libguile/validate.h" |
73 | #include "libguile/sort.h" | |
54e09076 MD |
74 | |
75 | /* The routine quicksort was extracted from the GNU C Library qsort.c | |
76 | written by Douglas C. Schmidt (schmidt@ics.uci.edu) | |
77 | and adapted to guile by adding an extra pointer less | |
78 | to quicksort by Roland Orre <orre@nada.kth.se>. | |
79 | ||
80 | The reason to do this instead of using the library function qsort | |
81 | was to avoid dependency of the ANSI-C extensions for local functions | |
82 | and also to avoid obscure pool based solutions. | |
15d9c4e3 MD |
83 | |
84 | This sorting routine is not much more efficient than the stable | |
85 | version but doesn't consume extra memory. | |
54e09076 MD |
86 | */ |
87 | ||
88 | /* Byte-wise swap two items of size SIZE. */ | |
89 | #define SWAP(a, b, size) \ | |
90 | do \ | |
91 | { \ | |
92 | register size_t __size = (size); \ | |
93 | register char *__a = (a), *__b = (b); \ | |
94 | do \ | |
95 | { \ | |
96 | char __tmp = *__a; \ | |
97 | *__a++ = *__b; \ | |
98 | *__b++ = __tmp; \ | |
99 | } while (--__size > 0); \ | |
100 | } while (0) | |
101 | ||
102 | /* Discontinue quicksort algorithm when partition gets below this size. | |
103 | This particular magic number was chosen to work best on a Sun 4/260. */ | |
104 | #define MAX_THRESH 4 | |
105 | ||
106 | /* Stack node declarations used to store unfulfilled partition obligations. */ | |
107 | typedef struct | |
108 | { | |
109 | char *lo; | |
110 | char *hi; | |
111 | } | |
112 | stack_node; | |
113 | ||
114 | /* The next 4 #defines implement a very fast in-line stack abstraction. */ | |
115 | #define STACK_SIZE (8 * sizeof(unsigned long int)) | |
116 | #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) | |
117 | #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) | |
118 | #define STACK_NOT_EMPTY (stack < top) | |
119 | ||
120 | ||
121 | /* Order size using quicksort. This implementation incorporates | |
122 | four optimizations discussed in Sedgewick: | |
123 | ||
124 | 1. Non-recursive, using an explicit stack of pointer that store the | |
125 | next array partition to sort. To save time, this maximum amount | |
126 | of space required to store an array of MAX_INT is allocated on the | |
127 | stack. Assuming a 32-bit integer, this needs only 32 * | |
128 | sizeof(stack_node) == 136 bits. Pretty cheap, actually. | |
129 | ||
130 | 2. Chose the pivot element using a median-of-three decision tree. | |
131 | This reduces the probability of selecting a bad pivot value and | |
132 | eliminates certain extraneous comparisons. | |
133 | ||
134 | 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving | |
135 | insertion sort to order the MAX_THRESH items within each partition. | |
136 | This is a big win, since insertion sort is faster for small, mostly | |
137 | sorted array segments. | |
138 | ||
139 | 4. The larger of the two sub-partitions is always pushed onto the | |
140 | stack first, with the algorithm then concentrating on the | |
141 | smaller partition. This *guarantees* no more than log (n) | |
142 | stack size is needed (actually O(1) in this case)! */ | |
143 | ||
144 | typedef int (*cmp_fun_t) (SCM less, | |
145 | const void*, | |
146 | const void*); | |
147 | ||
a34af05e MD |
148 | static const char s_buggy_less[] = "buggy less predicate used when sorting"; |
149 | ||
54e09076 MD |
150 | static void |
151 | quicksort (void *const pbase, | |
152 | size_t total_elems, | |
153 | size_t size, | |
154 | cmp_fun_t cmp, | |
155 | SCM less) | |
156 | { | |
157 | register char *base_ptr = (char *) pbase; | |
158 | ||
159 | /* Allocating SIZE bytes for a pivot buffer facilitates a better | |
160 | algorithm below since we can do comparisons directly on the pivot. */ | |
161 | char *pivot_buffer = (char *) alloca (size); | |
162 | const size_t max_thresh = MAX_THRESH * size; | |
163 | ||
164 | if (total_elems == 0) | |
165 | /* Avoid lossage with unsigned arithmetic below. */ | |
166 | return; | |
167 | ||
168 | if (total_elems > MAX_THRESH) | |
169 | { | |
170 | char *lo = base_ptr; | |
171 | char *hi = &lo[size * (total_elems - 1)]; | |
172 | /* Largest size needed for 32-bit int!!! */ | |
173 | stack_node stack[STACK_SIZE]; | |
174 | stack_node *top = stack + 1; | |
175 | ||
176 | while (STACK_NOT_EMPTY) | |
177 | { | |
178 | char *left_ptr; | |
179 | char *right_ptr; | |
180 | ||
181 | char *pivot = pivot_buffer; | |
182 | ||
183 | /* Select median value from among LO, MID, and HI. Rearrange | |
184 | LO and HI so the three values are sorted. This lowers the | |
185 | probability of picking a pathological pivot value and | |
186 | skips a comparison for both the LEFT_PTR and RIGHT_PTR. */ | |
187 | ||
188 | char *mid = lo + size * ((hi - lo) / size >> 1); | |
189 | ||
190 | if ((*cmp) (less, (void *) mid, (void *) lo)) | |
191 | SWAP (mid, lo, size); | |
192 | if ((*cmp) (less, (void *) hi, (void *) mid)) | |
193 | SWAP (mid, hi, size); | |
194 | else | |
195 | goto jump_over; | |
196 | if ((*cmp) (less, (void *) mid, (void *) lo)) | |
197 | SWAP (mid, lo, size); | |
198 | jump_over:; | |
199 | memcpy (pivot, mid, size); | |
200 | pivot = pivot_buffer; | |
201 | ||
202 | left_ptr = lo + size; | |
203 | right_ptr = hi - size; | |
204 | ||
205 | /* Here's the famous ``collapse the walls'' section of quicksort. | |
206 | Gotta like those tight inner loops! They are the main reason | |
207 | that this algorithm runs much faster than others. */ | |
208 | do | |
209 | { | |
210 | while ((*cmp) (less, (void *) left_ptr, (void *) pivot)) | |
a34af05e MD |
211 | { |
212 | left_ptr += size; | |
213 | /* The comparison predicate may be buggy */ | |
214 | if (left_ptr > hi) | |
5d2d2ffc | 215 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 216 | } |
54e09076 MD |
217 | |
218 | while ((*cmp) (less, (void *) pivot, (void *) right_ptr)) | |
a34af05e MD |
219 | { |
220 | right_ptr -= size; | |
221 | /* The comparison predicate may be buggy */ | |
222 | if (right_ptr < lo) | |
5d2d2ffc | 223 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 224 | } |
54e09076 MD |
225 | |
226 | if (left_ptr < right_ptr) | |
227 | { | |
228 | SWAP (left_ptr, right_ptr, size); | |
229 | left_ptr += size; | |
230 | right_ptr -= size; | |
231 | } | |
232 | else if (left_ptr == right_ptr) | |
233 | { | |
234 | left_ptr += size; | |
235 | right_ptr -= size; | |
236 | break; | |
237 | } | |
238 | } | |
239 | while (left_ptr <= right_ptr); | |
240 | ||
241 | /* Set up pointers for next iteration. First determine whether | |
242 | left and right partitions are below the threshold size. If so, | |
243 | ignore one or both. Otherwise, push the larger partition's | |
244 | bounds on the stack and continue sorting the smaller one. */ | |
245 | ||
246 | if ((size_t) (right_ptr - lo) <= max_thresh) | |
247 | { | |
248 | if ((size_t) (hi - left_ptr) <= max_thresh) | |
249 | /* Ignore both small partitions. */ | |
250 | POP (lo, hi); | |
251 | else | |
252 | /* Ignore small left partition. */ | |
253 | lo = left_ptr; | |
254 | } | |
255 | else if ((size_t) (hi - left_ptr) <= max_thresh) | |
256 | /* Ignore small right partition. */ | |
257 | hi = right_ptr; | |
258 | else if ((right_ptr - lo) > (hi - left_ptr)) | |
259 | { | |
260 | /* Push larger left partition indices. */ | |
261 | PUSH (lo, right_ptr); | |
262 | lo = left_ptr; | |
263 | } | |
264 | else | |
265 | { | |
266 | /* Push larger right partition indices. */ | |
267 | PUSH (left_ptr, hi); | |
268 | hi = right_ptr; | |
269 | } | |
270 | } | |
271 | } | |
272 | ||
273 | /* Once the BASE_PTR array is partially sorted by quicksort the rest | |
274 | is completely sorted using insertion sort, since this is efficient | |
275 | for partitions below MAX_THRESH size. BASE_PTR points to the beginning | |
276 | of the array to sort, and END_PTR points at the very last element in | |
277 | the array (*not* one beyond it!). */ | |
278 | ||
279 | { | |
280 | char *const end_ptr = &base_ptr[size * (total_elems - 1)]; | |
281 | char *tmp_ptr = base_ptr; | |
282 | char *thresh = min (end_ptr, base_ptr + max_thresh); | |
283 | register char *run_ptr; | |
284 | ||
285 | /* Find smallest element in first threshold and place it at the | |
286 | array's beginning. This is the smallest array element, | |
287 | and the operation speeds up insertion sort's inner loop. */ | |
288 | ||
289 | for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) | |
290 | if ((*cmp) (less, (void *) run_ptr, (void *) tmp_ptr)) | |
291 | tmp_ptr = run_ptr; | |
292 | ||
293 | if (tmp_ptr != base_ptr) | |
294 | SWAP (tmp_ptr, base_ptr, size); | |
295 | ||
296 | /* Insertion sort, running from left-hand-side up to right-hand-side. */ | |
297 | ||
298 | run_ptr = base_ptr + size; | |
299 | while ((run_ptr += size) <= end_ptr) | |
300 | { | |
301 | tmp_ptr = run_ptr - size; | |
302 | while ((*cmp) (less, (void *) run_ptr, (void *) tmp_ptr)) | |
a34af05e MD |
303 | { |
304 | tmp_ptr -= size; | |
305 | /* The comparison predicate may be buggy */ | |
306 | if (tmp_ptr < base_ptr) | |
5d2d2ffc | 307 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 308 | } |
54e09076 MD |
309 | |
310 | tmp_ptr += size; | |
311 | if (tmp_ptr != run_ptr) | |
312 | { | |
313 | char *trav; | |
314 | ||
315 | trav = run_ptr + size; | |
316 | while (--trav >= run_ptr) | |
317 | { | |
318 | char c = *trav; | |
319 | char *hi, *lo; | |
320 | ||
321 | for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) | |
322 | *hi = *lo; | |
323 | *hi = c; | |
324 | } | |
325 | } | |
326 | } | |
327 | } | |
328 | } /* quicksort */ | |
329 | ||
330 | ||
331 | /* comparison routines */ | |
332 | ||
333 | static int | |
334 | subr2less (SCM less, const void *a, const void *b) | |
335 | { | |
336 | return SCM_NFALSEP (SCM_SUBRF (less) (*(SCM *) a, *(SCM *) b)); | |
337 | } /* subr2less */ | |
338 | ||
54e09076 MD |
339 | static int |
340 | lsubrless (SCM less, const void *a, const void *b) | |
341 | { | |
342 | return SCM_NFALSEP (SCM_SUBRF (less) | |
343 | (scm_cons (*(SCM *) a, | |
344 | scm_cons (*(SCM *) b, SCM_EOL)))); | |
345 | } /* lsubrless */ | |
346 | ||
347 | static int | |
348 | closureless (SCM code, const void *a, const void *b) | |
349 | { | |
726d810a | 350 | SCM env = SCM_EXTEND_ENV (SCM_CLOSURE_FORMALS (code), |
4c6fe5fc MD |
351 | scm_cons (*(SCM *) a, |
352 | scm_cons (*(SCM *) b, SCM_EOL)), | |
353 | SCM_ENV (code)); | |
54e09076 | 354 | /* Evaluate the closure body */ |
f9450cdb | 355 | return !SCM_FALSEP (scm_eval_body (SCM_CLOSURE_BODY (code), env)); |
54e09076 MD |
356 | } /* closureless */ |
357 | ||
358 | static int | |
359 | applyless (SCM less, const void *a, const void *b) | |
360 | { | |
fdc28395 | 361 | return SCM_NFALSEP (scm_call_2 (less, *(SCM *) a, *(SCM *) b)); |
54e09076 MD |
362 | } /* applyless */ |
363 | ||
364 | static cmp_fun_t | |
365 | scm_cmp_function (SCM p) | |
366 | { | |
367 | switch (SCM_TYP7 (p)) | |
368 | { | |
369 | case scm_tc7_subr_2: | |
504d99c5 | 370 | case scm_tc7_subr_2o: |
54e09076 MD |
371 | case scm_tc7_rpsubr: |
372 | case scm_tc7_asubr: | |
373 | return subr2less; | |
54e09076 MD |
374 | case scm_tc7_lsubr: |
375 | return lsubrless; | |
376 | case scm_tcs_closures: | |
377 | return closureless; | |
378 | default: | |
379 | return applyless; | |
380 | } | |
381 | } /* scm_cmp_function */ | |
382 | ||
54e09076 MD |
383 | |
384 | /* Question: Is there any need to make this a more general array sort? | |
385 | It is probably enough to manage the vector type. */ | |
386 | /* endpos equal as for substring, i.e. endpos is not included. */ | |
da4a1dba | 387 | /* More natural with length? */ |
1bbd0b84 | 388 | |
a1ec6916 | 389 | SCM_DEFINE (scm_restricted_vector_sort_x, "restricted-vector-sort!", 4, 0, 0, |
1bbd0b84 | 390 | (SCM vec, SCM less, SCM startpos, SCM endpos), |
e3239868 DH |
391 | "Sort the vector @var{vec}, using @var{less} for comparing\n" |
392 | "the vector elements. @var{startpos} and @var{endpos} delimit\n" | |
393 | "the range of the vector which gets sorted. The return value\n" | |
394 | "is not specified.") | |
1bbd0b84 | 395 | #define FUNC_NAME s_scm_restricted_vector_sort_x |
54e09076 MD |
396 | { |
397 | size_t vlen, spos, len, size = sizeof (SCM); | |
398 | SCM *vp; | |
399 | ||
34d19ef6 HWN |
400 | SCM_VALIDATE_VECTOR (1, vec); |
401 | SCM_VALIDATE_NIM (2, less); | |
b5c2579a | 402 | |
34d19ef6 | 403 | vp = SCM_WRITABLE_VELTS (vec); /* vector pointer */ |
b5c2579a | 404 | vlen = SCM_VECTOR_LENGTH (vec); |
54e09076 | 405 | |
729dbac3 | 406 | SCM_VALIDATE_INUM_MIN_COPY (3, startpos, 0, spos); |
34d19ef6 HWN |
407 | SCM_ASSERT_RANGE (3, startpos, spos <= vlen); |
408 | SCM_VALIDATE_INUM_RANGE (4, endpos,0, vlen+1); | |
54e09076 MD |
409 | len = SCM_INUM (endpos) - spos; |
410 | ||
411 | quicksort (&vp[spos], len, size, scm_cmp_function (less), less); | |
34d19ef6 | 412 | |
504d99c5 | 413 | return scm_return_first (SCM_UNSPECIFIED, vec); |
54e09076 | 414 | /* return vec; */ |
1bbd0b84 GB |
415 | } |
416 | #undef FUNC_NAME | |
54e09076 MD |
417 | |
418 | /* (sorted? sequence less?) | |
419 | * is true when sequence is a list (x0 x1 ... xm) or a vector #(x0 ... xm) | |
420 | * such that for all 1 <= i <= m, | |
421 | * (not (less? (list-ref list i) (list-ref list (- i 1)))). */ | |
a1ec6916 | 422 | SCM_DEFINE (scm_sorted_p, "sorted?", 2, 0, 0, |
1bbd0b84 | 423 | (SCM items, SCM less), |
e3239868 DH |
424 | "Return @code{#t} iff @var{items} is a list or a vector such that\n" |
425 | "for all 1 <= i <= m, the predicate @var{less} returns true when\n" | |
426 | "applied to all elements i - 1 and i") | |
1bbd0b84 | 427 | #define FUNC_NAME s_scm_sorted_p |
54e09076 | 428 | { |
c014a02e | 429 | long len, j; /* list/vector length, temp j */ |
54e09076 | 430 | SCM item, rest; /* rest of items loop variable */ |
34d19ef6 | 431 | SCM const *vp; |
54e09076 MD |
432 | cmp_fun_t cmp = scm_cmp_function (less); |
433 | ||
c96d76b8 | 434 | if (SCM_NULL_OR_NIL_P (items)) |
54e09076 | 435 | return SCM_BOOL_T; |
1bbd0b84 | 436 | |
34d19ef6 | 437 | SCM_VALIDATE_NIM (2, less); |
54e09076 MD |
438 | |
439 | if (SCM_CONSP (items)) | |
440 | { | |
441 | len = scm_ilength (items); /* also checks that it's a pure list */ | |
34d19ef6 | 442 | SCM_ASSERT_RANGE (1, items, len >= 0); |
54e09076 MD |
443 | if (len <= 1) |
444 | return SCM_BOOL_T; | |
445 | ||
446 | item = SCM_CAR (items); | |
447 | rest = SCM_CDR (items); | |
448 | j = len - 1; | |
449 | while (j > 0) | |
450 | { | |
4b479d98 | 451 | if ((*cmp) (less, SCM_CARLOC(rest), &item)) |
54e09076 MD |
452 | return SCM_BOOL_F; |
453 | else | |
454 | { | |
455 | item = SCM_CAR (rest); | |
456 | rest = SCM_CDR (rest); | |
457 | j--; | |
458 | } | |
459 | } | |
460 | return SCM_BOOL_T; | |
461 | } | |
462 | else | |
463 | { | |
b5c2579a DH |
464 | SCM_VALIDATE_VECTOR (1, items); |
465 | ||
466 | vp = SCM_VELTS (items); /* vector pointer */ | |
467 | len = SCM_VECTOR_LENGTH (items); | |
468 | j = len - 1; | |
469 | while (j > 0) | |
54e09076 | 470 | { |
b5c2579a DH |
471 | if ((*cmp) (less, &vp[1], vp)) |
472 | return SCM_BOOL_F; | |
473 | else | |
474 | { | |
475 | vp++; | |
476 | j--; | |
477 | } | |
54e09076 | 478 | } |
b5c2579a | 479 | return SCM_BOOL_T; |
54e09076 | 480 | } |
b5c2579a | 481 | |
54e09076 | 482 | return SCM_BOOL_F; |
1bbd0b84 GB |
483 | } |
484 | #undef FUNC_NAME | |
54e09076 MD |
485 | |
486 | /* (merge a b less?) | |
487 | takes two lists a and b such that (sorted? a less?) and (sorted? b less?) | |
488 | and returns a new list in which the elements of a and b have been stably | |
489 | interleaved so that (sorted? (merge a b less?) less?). | |
490 | Note: this does _not_ accept vectors. */ | |
a1ec6916 | 491 | SCM_DEFINE (scm_merge, "merge", 3, 0, 0, |
1bbd0b84 | 492 | (SCM alist, SCM blist, SCM less), |
8f85c0c6 NJ |
493 | "Merge two already sorted lists into one.\n" |
494 | "Given two lists @var{alist} and @var{blist}, such that\n" | |
495 | "@code{(sorted? alist less?)} and @code{(sorted? blist less?)},\n" | |
496 | "return a new list in which the elements of @var{alist} and\n" | |
e3239868 DH |
497 | "@var{blist} have been stably interleaved so that\n" |
498 | "@code{(sorted? (merge alist blist less?) less?)}.\n" | |
499 | "Note: this does _not_ accept vectors.") | |
1bbd0b84 | 500 | #define FUNC_NAME s_scm_merge |
54e09076 | 501 | { |
c014a02e | 502 | long alen, blen; /* list lengths */ |
54e09076 MD |
503 | SCM build, last; |
504 | cmp_fun_t cmp = scm_cmp_function (less); | |
34d19ef6 | 505 | SCM_VALIDATE_NIM (3, less); |
54e09076 | 506 | |
c96d76b8 | 507 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 508 | return blist; |
c96d76b8 | 509 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
510 | return alist; |
511 | else | |
512 | { | |
34d19ef6 HWN |
513 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1, alist, alen); |
514 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2, blist, blen); | |
4b479d98 | 515 | if ((*cmp) (less, SCM_CARLOC (blist), SCM_CARLOC (alist))) |
54e09076 MD |
516 | { |
517 | build = scm_cons (SCM_CAR (blist), SCM_EOL); | |
518 | blist = SCM_CDR (blist); | |
519 | blen--; | |
520 | } | |
c56cc3c8 MD |
521 | else |
522 | { | |
523 | build = scm_cons (SCM_CAR (alist), SCM_EOL); | |
524 | alist = SCM_CDR (alist); | |
525 | alen--; | |
526 | } | |
54e09076 MD |
527 | last = build; |
528 | while ((alen > 0) && (blen > 0)) | |
529 | { | |
4b479d98 | 530 | if ((*cmp) (less, SCM_CARLOC (blist), SCM_CARLOC (alist))) |
54e09076 MD |
531 | { |
532 | SCM_SETCDR (last, scm_cons (SCM_CAR (blist), SCM_EOL)); | |
533 | blist = SCM_CDR (blist); | |
534 | blen--; | |
535 | } | |
c56cc3c8 MD |
536 | else |
537 | { | |
538 | SCM_SETCDR (last, scm_cons (SCM_CAR (alist), SCM_EOL)); | |
539 | alist = SCM_CDR (alist); | |
540 | alen--; | |
541 | } | |
54e09076 MD |
542 | last = SCM_CDR (last); |
543 | } | |
544 | if ((alen > 0) && (blen == 0)) | |
545 | SCM_SETCDR (last, alist); | |
546 | else if ((alen == 0) && (blen > 0)) | |
547 | SCM_SETCDR (last, blist); | |
548 | } | |
549 | return build; | |
1bbd0b84 GB |
550 | } |
551 | #undef FUNC_NAME | |
552 | ||
54e09076 MD |
553 | |
554 | static SCM | |
555 | scm_merge_list_x (SCM alist, SCM blist, | |
556 | long alen, long blen, | |
557 | cmp_fun_t cmp, SCM less) | |
558 | { | |
559 | SCM build, last; | |
560 | ||
c96d76b8 | 561 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 562 | return blist; |
c96d76b8 | 563 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
564 | return alist; |
565 | else | |
566 | { | |
4b479d98 | 567 | if ((*cmp) (less, SCM_CARLOC (blist), SCM_CARLOC (alist))) |
54e09076 MD |
568 | { |
569 | build = blist; | |
570 | blist = SCM_CDR (blist); | |
571 | blen--; | |
572 | } | |
c56cc3c8 MD |
573 | else |
574 | { | |
575 | build = alist; | |
576 | alist = SCM_CDR (alist); | |
577 | alen--; | |
578 | } | |
54e09076 MD |
579 | last = build; |
580 | while ((alen > 0) && (blen > 0)) | |
581 | { | |
4b479d98 | 582 | if ((*cmp) (less, SCM_CARLOC (blist), SCM_CARLOC (alist))) |
54e09076 MD |
583 | { |
584 | SCM_SETCDR (last, blist); | |
585 | blist = SCM_CDR (blist); | |
586 | blen--; | |
587 | } | |
c56cc3c8 MD |
588 | else |
589 | { | |
590 | SCM_SETCDR (last, alist); | |
591 | alist = SCM_CDR (alist); | |
592 | alen--; | |
593 | } | |
54e09076 MD |
594 | last = SCM_CDR (last); |
595 | } | |
596 | if ((alen > 0) && (blen == 0)) | |
597 | SCM_SETCDR (last, alist); | |
598 | else if ((alen == 0) && (blen > 0)) | |
599 | SCM_SETCDR (last, blist); | |
600 | } | |
601 | return build; | |
602 | } /* scm_merge_list_x */ | |
603 | ||
a1ec6916 | 604 | SCM_DEFINE (scm_merge_x, "merge!", 3, 0, 0, |
1bbd0b84 | 605 | (SCM alist, SCM blist, SCM less), |
e3239868 DH |
606 | "Takes two lists @var{alist} and @var{blist} such that\n" |
607 | "@code{(sorted? alist less?)} and @code{(sorted? blist less?)} and\n" | |
608 | "returns a new list in which the elements of @var{alist} and\n" | |
609 | "@var{blist} have been stably interleaved so that\n" | |
610 | " @code{(sorted? (merge alist blist less?) less?)}.\n" | |
611 | "This is the destructive variant of @code{merge}\n" | |
612 | "Note: this does _not_ accept vectors.") | |
1bbd0b84 | 613 | #define FUNC_NAME s_scm_merge_x |
54e09076 | 614 | { |
c014a02e | 615 | long alen, blen; /* list lengths */ |
54e09076 | 616 | |
34d19ef6 | 617 | SCM_VALIDATE_NIM (3, less); |
c96d76b8 | 618 | if (SCM_NULL_OR_NIL_P (alist)) |
54e09076 | 619 | return blist; |
c96d76b8 | 620 | else if (SCM_NULL_OR_NIL_P (blist)) |
54e09076 MD |
621 | return alist; |
622 | else | |
623 | { | |
34d19ef6 HWN |
624 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1, alist, alen); |
625 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2, blist, blen); | |
54e09076 MD |
626 | return scm_merge_list_x (alist, blist, |
627 | alen, blen, | |
628 | scm_cmp_function (less), | |
629 | less); | |
630 | } | |
1bbd0b84 GB |
631 | } |
632 | #undef FUNC_NAME | |
54e09076 MD |
633 | |
634 | /* This merge sort algorithm is same as slib's by Richard A. O'Keefe. | |
635 | The algorithm is stable. We also tried to use the algorithm used by | |
636 | scsh's merge-sort but that algorithm showed to not be stable, even | |
637 | though it claimed to be. | |
638 | */ | |
639 | static SCM | |
640 | scm_merge_list_step (SCM * seq, | |
641 | cmp_fun_t cmp, | |
642 | SCM less, | |
c014a02e | 643 | long n) |
54e09076 | 644 | { |
c56cc3c8 MD |
645 | SCM a, b; |
646 | ||
54e09076 MD |
647 | if (n > 2) |
648 | { | |
c014a02e | 649 | long mid = n / 2; |
c56cc3c8 MD |
650 | a = scm_merge_list_step (seq, cmp, less, mid); |
651 | b = scm_merge_list_step (seq, cmp, less, n - mid); | |
652 | return scm_merge_list_x (a, b, mid, n - mid, cmp, less); | |
54e09076 MD |
653 | } |
654 | else if (n == 2) | |
655 | { | |
656 | SCM p = *seq; | |
657 | SCM rest = SCM_CDR (*seq); | |
658 | SCM x = SCM_CAR (*seq); | |
659 | SCM y = SCM_CAR (SCM_CDR (*seq)); | |
660 | *seq = SCM_CDR (rest); | |
661 | SCM_SETCDR (rest, SCM_EOL); | |
662 | if ((*cmp) (less, &y, &x)) | |
663 | { | |
4b479d98 DH |
664 | SCM_SETCAR (p, y); |
665 | SCM_SETCAR (rest, x); | |
54e09076 MD |
666 | } |
667 | return p; | |
668 | } | |
669 | else if (n == 1) | |
670 | { | |
671 | SCM p = *seq; | |
672 | *seq = SCM_CDR (p); | |
673 | SCM_SETCDR (p, SCM_EOL); | |
674 | return p; | |
675 | } | |
676 | else | |
677 | return SCM_EOL; | |
678 | } /* scm_merge_list_step */ | |
679 | ||
680 | ||
54e09076 | 681 | /* scm_sort_x manages lists and vectors, not stable sort */ |
a1ec6916 | 682 | SCM_DEFINE (scm_sort_x, "sort!", 2, 0, 0, |
1bbd0b84 | 683 | (SCM items, SCM less), |
e3239868 DH |
684 | "Sort the sequence @var{items}, which may be a list or a\n" |
685 | "vector. @var{less} is used for comparing the sequence\n" | |
686 | "elements. The sorting is destructive, that means that the\n" | |
687 | "input sequence is modified to produce the sorted result.\n" | |
688 | "This is not a stable sort.") | |
1bbd0b84 | 689 | #define FUNC_NAME s_scm_sort_x |
54e09076 | 690 | { |
c014a02e | 691 | long len; /* list/vector length */ |
c96d76b8 NJ |
692 | if (SCM_NULL_OR_NIL_P (items)) |
693 | return items; | |
b5c2579a | 694 | |
34d19ef6 | 695 | SCM_VALIDATE_NIM (2, less); |
54e09076 MD |
696 | |
697 | if (SCM_CONSP (items)) | |
698 | { | |
34d19ef6 | 699 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 MD |
700 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); |
701 | } | |
702 | else if (SCM_VECTORP (items)) | |
703 | { | |
b5c2579a | 704 | len = SCM_VECTOR_LENGTH (items); |
54e09076 MD |
705 | scm_restricted_vector_sort_x (items, |
706 | less, | |
707 | SCM_MAKINUM (0L), | |
708 | SCM_MAKINUM (len)); | |
709 | return items; | |
710 | } | |
711 | else | |
276dd677 | 712 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 713 | } |
0f981281 | 714 | #undef FUNC_NAME |
54e09076 MD |
715 | |
716 | /* scm_sort manages lists and vectors, not stable sort */ | |
1bbd0b84 | 717 | |
a1ec6916 | 718 | SCM_DEFINE (scm_sort, "sort", 2, 0, 0, |
1bbd0b84 | 719 | (SCM items, SCM less), |
e3239868 DH |
720 | "Sort the sequence @var{items}, which may be a list or a\n" |
721 | "vector. @var{less} is used for comparing the sequence\n" | |
722 | "elements. This is not a stable sort.") | |
1bbd0b84 | 723 | #define FUNC_NAME s_scm_sort |
54e09076 | 724 | { |
c96d76b8 NJ |
725 | if (SCM_NULL_OR_NIL_P (items)) |
726 | return items; | |
b5c2579a | 727 | |
34d19ef6 | 728 | SCM_VALIDATE_NIM (2, less); |
54e09076 MD |
729 | if (SCM_CONSP (items)) |
730 | { | |
c014a02e | 731 | long len; |
e9e225e5 | 732 | |
34d19ef6 | 733 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 MD |
734 | items = scm_list_copy (items); |
735 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); | |
736 | } | |
3cfe6eab | 737 | #if SCM_HAVE_ARRAYS |
afe5177e | 738 | /* support ordinary vectors even if arrays not available? */ |
54e09076 MD |
739 | else if (SCM_VECTORP (items)) |
740 | { | |
c014a02e | 741 | long len = SCM_VECTOR_LENGTH (items); |
e9e225e5 GH |
742 | SCM sortvec = scm_make_uve (len, scm_array_prototype (items)); |
743 | ||
54e09076 MD |
744 | scm_array_copy_x (items, sortvec); |
745 | scm_restricted_vector_sort_x (sortvec, | |
746 | less, | |
747 | SCM_MAKINUM (0L), | |
748 | SCM_MAKINUM (len)); | |
749 | return sortvec; | |
750 | } | |
afe5177e | 751 | #endif |
54e09076 | 752 | else |
276dd677 | 753 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 754 | } |
0f981281 | 755 | #undef FUNC_NAME |
54e09076 MD |
756 | |
757 | static void | |
1d1559ce HWN |
758 | scm_merge_vector_x (SCM vec, |
759 | SCM * temp, | |
54e09076 MD |
760 | cmp_fun_t cmp, |
761 | SCM less, | |
c014a02e ML |
762 | long low, |
763 | long mid, | |
764 | long high) | |
54e09076 | 765 | { |
c014a02e ML |
766 | long it; /* Index for temp vector */ |
767 | long i1 = low; /* Index for lower vector segment */ | |
768 | long i2 = mid + 1; /* Index for upper vector segment */ | |
54e09076 MD |
769 | |
770 | /* Copy while both segments contain more characters */ | |
771 | for (it = low; (i1 <= mid) && (i2 <= high); ++it) | |
1d1559ce HWN |
772 | { |
773 | /* | |
774 | Every call of LESS might invoke GC. For full correctness, we | |
775 | should reset the generation of vecbase and tempbase between | |
776 | every call of less. | |
54e09076 | 777 | |
1d1559ce HWN |
778 | */ |
779 | register SCM *vp = SCM_WRITABLE_VELTS(vec); | |
780 | ||
781 | if ((*cmp) (less, &vp[i2], &vp[i1])) | |
782 | temp[it] = vp[i2++]; | |
783 | else | |
784 | temp[it] = vp[i1++]; | |
785 | } | |
54e09076 | 786 | |
1d1559ce HWN |
787 | { |
788 | register SCM *vp = SCM_WRITABLE_VELTS(vec); | |
789 | ||
790 | /* Copy while first segment contains more characters */ | |
791 | while (i1 <= mid) | |
792 | temp[it++] = vp[i1++]; | |
793 | ||
794 | /* Copy while second segment contains more characters */ | |
795 | while (i2 <= high) | |
796 | temp[it++] = vp[i2++]; | |
797 | ||
798 | /* Copy back from temp to vp */ | |
799 | for (it = low; it <= high; ++it) | |
800 | vp[it] = temp[it]; | |
801 | } | |
802 | } /* scm_merge_vector_x */ | |
54e09076 MD |
803 | |
804 | static void | |
1d1559ce HWN |
805 | scm_merge_vector_step (SCM vp, |
806 | SCM * temp, | |
54e09076 MD |
807 | cmp_fun_t cmp, |
808 | SCM less, | |
c014a02e ML |
809 | long low, |
810 | long high) | |
54e09076 MD |
811 | { |
812 | if (high > low) | |
813 | { | |
c014a02e | 814 | long mid = (low + high) / 2; |
54e09076 MD |
815 | scm_merge_vector_step (vp, temp, cmp, less, low, mid); |
816 | scm_merge_vector_step (vp, temp, cmp, less, mid+1, high); | |
817 | scm_merge_vector_x (vp, temp, cmp, less, low, mid, high); | |
818 | } | |
819 | } /* scm_merge_vector_step */ | |
820 | ||
821 | ||
54e09076 MD |
822 | /* stable-sort! manages lists and vectors */ |
823 | ||
a1ec6916 | 824 | SCM_DEFINE (scm_stable_sort_x, "stable-sort!", 2, 0, 0, |
1bbd0b84 | 825 | (SCM items, SCM less), |
e3239868 DH |
826 | "Sort the sequence @var{items}, which may be a list or a\n" |
827 | "vector. @var{less} is used for comparing the sequence elements.\n" | |
828 | "The sorting is destructive, that means that the input sequence\n" | |
829 | "is modified to produce the sorted result.\n" | |
830 | "This is a stable sort.") | |
1bbd0b84 | 831 | #define FUNC_NAME s_scm_stable_sort_x |
54e09076 | 832 | { |
c014a02e | 833 | long len; /* list/vector length */ |
54e09076 | 834 | |
c96d76b8 NJ |
835 | if (SCM_NULL_OR_NIL_P (items)) |
836 | return items; | |
b5c2579a | 837 | |
34d19ef6 | 838 | SCM_VALIDATE_NIM (2, less); |
54e09076 MD |
839 | if (SCM_CONSP (items)) |
840 | { | |
34d19ef6 | 841 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 MD |
842 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); |
843 | } | |
844 | else if (SCM_VECTORP (items)) | |
845 | { | |
1d1559ce | 846 | SCM *temp; |
b5c2579a | 847 | len = SCM_VECTOR_LENGTH (items); |
34d19ef6 | 848 | |
34d19ef6 | 849 | /* |
1d1559ce HWN |
850 | the following array does not contain any new references to |
851 | SCM objects, so we can get away with allocing it on the heap. | |
852 | */ | |
67329a9e | 853 | temp = scm_malloc (len * sizeof(SCM)); |
34d19ef6 | 854 | |
1d1559ce | 855 | scm_merge_vector_step (items, |
54e09076 MD |
856 | temp, |
857 | scm_cmp_function (less), | |
858 | less, | |
859 | 0, | |
860 | len - 1); | |
861 | free(temp); | |
862 | return items; | |
863 | } | |
864 | else | |
276dd677 | 865 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 866 | } |
0f981281 | 867 | #undef FUNC_NAME |
54e09076 MD |
868 | |
869 | /* stable_sort manages lists and vectors */ | |
a1ec6916 | 870 | SCM_DEFINE (scm_stable_sort, "stable-sort", 2, 0, 0, |
1bbd0b84 | 871 | (SCM items, SCM less), |
e3239868 DH |
872 | "Sort the sequence @var{items}, which may be a list or a\n" |
873 | "vector. @var{less} is used for comparing the sequence elements.\n" | |
874 | "This is a stable sort.") | |
1bbd0b84 | 875 | #define FUNC_NAME s_scm_stable_sort |
54e09076 | 876 | { |
1d1559ce | 877 | |
c96d76b8 NJ |
878 | if (SCM_NULL_OR_NIL_P (items)) |
879 | return items; | |
b5c2579a | 880 | |
34d19ef6 | 881 | SCM_VALIDATE_NIM (2, less); |
54e09076 MD |
882 | if (SCM_CONSP (items)) |
883 | { | |
1d1559ce | 884 | long len; /* list/vector length */ |
34d19ef6 | 885 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
54e09076 MD |
886 | items = scm_list_copy (items); |
887 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); | |
888 | } | |
3cfe6eab | 889 | #if SCM_HAVE_ARRAYS |
afe5177e | 890 | /* support ordinary vectors even if arrays not available? */ |
54e09076 MD |
891 | else if (SCM_VECTORP (items)) |
892 | { | |
1d1559ce | 893 | long len = SCM_VECTOR_LENGTH (items); |
67329a9e | 894 | SCM *temp = scm_malloc (len * sizeof (SCM)); |
1d1559ce | 895 | SCM retvec = scm_make_uve (len, scm_array_prototype (items)); |
54e09076 | 896 | scm_array_copy_x (items, retvec); |
34d19ef6 | 897 | |
1d1559ce | 898 | scm_merge_vector_step (retvec, |
54e09076 MD |
899 | temp, |
900 | scm_cmp_function (less), | |
901 | less, | |
902 | 0, | |
903 | len - 1); | |
904 | free (temp); | |
905 | return retvec; | |
906 | } | |
afe5177e | 907 | #endif |
54e09076 | 908 | else |
276dd677 | 909 | SCM_WRONG_TYPE_ARG (1, items); |
1bbd0b84 | 910 | } |
0f981281 | 911 | #undef FUNC_NAME |
54e09076 | 912 | |
1bbd0b84 | 913 | /* stable */ |
a1ec6916 | 914 | SCM_DEFINE (scm_sort_list_x, "sort-list!", 2, 0, 0, |
1bbd0b84 | 915 | (SCM items, SCM less), |
e3239868 DH |
916 | "Sort the list @var{items}, using @var{less} for comparing the\n" |
917 | "list elements. The sorting is destructive, that means that the\n" | |
918 | "input list is modified to produce the sorted result.\n" | |
919 | "This is a stable sort.") | |
1bbd0b84 | 920 | #define FUNC_NAME s_scm_sort_list_x |
54e09076 | 921 | { |
c014a02e | 922 | long len; |
34d19ef6 HWN |
923 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
924 | SCM_VALIDATE_NIM (2, less); | |
54e09076 | 925 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); |
1bbd0b84 | 926 | } |
0f981281 | 927 | #undef FUNC_NAME |
54e09076 | 928 | |
1bbd0b84 | 929 | /* stable */ |
a1ec6916 | 930 | SCM_DEFINE (scm_sort_list, "sort-list", 2, 0, 0, |
e3239868 DH |
931 | (SCM items, SCM less), |
932 | "Sort the list @var{items}, using @var{less} for comparing the\n" | |
933 | "list elements. This is a stable sort.") | |
1bbd0b84 | 934 | #define FUNC_NAME s_scm_sort_list |
54e09076 | 935 | { |
c014a02e | 936 | long len; |
34d19ef6 HWN |
937 | SCM_VALIDATE_LIST_COPYLEN (1, items, len); |
938 | SCM_VALIDATE_NIM (2, less); | |
54e09076 MD |
939 | items = scm_list_copy (items); |
940 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); | |
1bbd0b84 | 941 | } |
0f981281 | 942 | #undef FUNC_NAME |
54e09076 MD |
943 | |
944 | void | |
945 | scm_init_sort () | |
946 | { | |
a0599745 | 947 | #include "libguile/sort.x" |
54e09076 MD |
948 | |
949 | scm_add_feature ("sort"); | |
950 | } | |
89e00824 ML |
951 | |
952 | /* | |
953 | Local Variables: | |
954 | c-file-style: "gnu" | |
955 | End: | |
956 | */ |