Commit | Line | Data |
---|---|---|
54e09076 MD |
1 | /* Copyright (C) 1999 Free Software Foundation, Inc. |
2 | * This program is free software; you can redistribute it and/or modify | |
3 | * it under the terms of the GNU General Public License as published by | |
4 | * the Free Software Foundation; either version 2, or (at your option) | |
5 | * any later version. | |
6 | * | |
7 | * This program is distributed in the hope that it will be useful, | |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
10 | * GNU General Public License for more details. | |
11 | * | |
12 | * You should have received a copy of the GNU General Public License | |
13 | * along with this software; see the file COPYING. If not, write to | |
14 | * the Free Software Foundation, Inc., 59 Temple Place, Suite 330, | |
15 | * Boston, MA 02111-1307 USA | |
16 | * | |
17 | * As a special exception, the Free Software Foundation gives permission | |
18 | * for additional uses of the text contained in its release of GUILE. | |
19 | * | |
20 | * The exception is that, if you link the GUILE library with other files | |
21 | * to produce an executable, this does not by itself cause the | |
22 | * resulting executable to be covered by the GNU General Public License. | |
23 | * Your use of that executable is in no way restricted on account of | |
24 | * linking the GUILE library code into it. | |
25 | * | |
26 | * This exception does not however invalidate any other reasons why | |
27 | * the executable file might be covered by the GNU General Public License. | |
28 | * | |
29 | * This exception applies only to the code released by the | |
30 | * Free Software Foundation under the name GUILE. If you copy | |
31 | * code from other Free Software Foundation releases into a copy of | |
32 | * GUILE, as the General Public License permits, the exception does | |
33 | * not apply to the code that you add in this way. To avoid misleading | |
34 | * anyone as to the status of such modified files, you must delete | |
35 | * this exception notice from them. | |
36 | * | |
37 | * If you write modifications of your own for GUILE, it is your choice | |
38 | * whether to permit this exception to apply to your modifications. | |
39 | * If you do not wish that, delete this exception notice. */ | |
40 | ||
1bbd0b84 GB |
41 | /* Software engineering face-lift by Greg J. Badros, 11-Dec-1999, |
42 | gjb@cs.washington.edu, http://www.cs.washington.edu/homes/gjb */ | |
43 | ||
44 | ||
54e09076 MD |
45 | /* Written in December 1998 by Roland Orre <orre@nada.kth.se> |
46 | * This implements the same sort interface as slib/sort.scm | |
47 | * for lists and vectors where slib defines: | |
48 | * sorted?, merge, merge!, sort, sort! | |
15d9c4e3 | 49 | * For scsh compatibility sort-list and sort-list! are also defined. |
54e09076 | 50 | * In cases where a stable-sort is required use stable-sort or |
15d9c4e3 | 51 | * stable-sort!. An additional feature is |
54e09076 | 52 | * (restricted-vector-sort! vector less? startpos endpos) |
15d9c4e3 | 53 | * which allows you to sort part of a vector. |
54e09076 MD |
54 | * Thanks to Aubrey Jaffer for the slib/sort.scm library. |
55 | * Thanks to Richard A. O'Keefe (based on Prolog code by D.H.D.Warren) | |
56 | * for the merge sort inspiration. | |
57 | * Thanks to Douglas C. Schmidt (schmidt@ics.uci.edu) for the | |
58 | * quicksort code. | |
59 | */ | |
60 | ||
61 | /* We need this to get the definitions for HAVE_ALLOCA_H, etc. */ | |
62 | #include "scmconfig.h" | |
63 | ||
64 | /* AIX requires this to be the first thing in the file. The #pragma | |
65 | directive is indented so pre-ANSI compilers will ignore it, rather | |
66 | than choke on it. */ | |
67 | #ifndef __GNUC__ | |
68 | # if HAVE_ALLOCA_H | |
69 | # include <alloca.h> | |
70 | # else | |
71 | # ifdef _AIX | |
72 | #pragma alloca | |
73 | # else | |
74 | # ifndef alloca /* predefined by HP cc +Olibcalls */ | |
75 | char *alloca (); | |
76 | # endif | |
77 | # endif | |
78 | # endif | |
79 | #endif | |
80 | ||
81 | #include "_scm.h" | |
82 | ||
83 | #include "eval.h" | |
84 | #include "unif.h" | |
85 | #include "ramap.h" | |
86 | #include "alist.h" | |
a5cae3f3 | 87 | #include "feature.h" |
003d1fd0 | 88 | #include "vectors.h" |
54e09076 | 89 | |
b6791b2e | 90 | #include "validate.h" |
54e09076 MD |
91 | #include "sort.h" |
92 | ||
93 | /* The routine quicksort was extracted from the GNU C Library qsort.c | |
94 | written by Douglas C. Schmidt (schmidt@ics.uci.edu) | |
95 | and adapted to guile by adding an extra pointer less | |
96 | to quicksort by Roland Orre <orre@nada.kth.se>. | |
97 | ||
98 | The reason to do this instead of using the library function qsort | |
99 | was to avoid dependency of the ANSI-C extensions for local functions | |
100 | and also to avoid obscure pool based solutions. | |
15d9c4e3 MD |
101 | |
102 | This sorting routine is not much more efficient than the stable | |
103 | version but doesn't consume extra memory. | |
54e09076 MD |
104 | */ |
105 | ||
106 | /* Byte-wise swap two items of size SIZE. */ | |
107 | #define SWAP(a, b, size) \ | |
108 | do \ | |
109 | { \ | |
110 | register size_t __size = (size); \ | |
111 | register char *__a = (a), *__b = (b); \ | |
112 | do \ | |
113 | { \ | |
114 | char __tmp = *__a; \ | |
115 | *__a++ = *__b; \ | |
116 | *__b++ = __tmp; \ | |
117 | } while (--__size > 0); \ | |
118 | } while (0) | |
119 | ||
120 | /* Discontinue quicksort algorithm when partition gets below this size. | |
121 | This particular magic number was chosen to work best on a Sun 4/260. */ | |
122 | #define MAX_THRESH 4 | |
123 | ||
124 | /* Stack node declarations used to store unfulfilled partition obligations. */ | |
125 | typedef struct | |
126 | { | |
127 | char *lo; | |
128 | char *hi; | |
129 | } | |
130 | stack_node; | |
131 | ||
132 | /* The next 4 #defines implement a very fast in-line stack abstraction. */ | |
133 | #define STACK_SIZE (8 * sizeof(unsigned long int)) | |
134 | #define PUSH(low, high) ((void) ((top->lo = (low)), (top->hi = (high)), ++top)) | |
135 | #define POP(low, high) ((void) (--top, (low = top->lo), (high = top->hi))) | |
136 | #define STACK_NOT_EMPTY (stack < top) | |
137 | ||
138 | ||
139 | /* Order size using quicksort. This implementation incorporates | |
140 | four optimizations discussed in Sedgewick: | |
141 | ||
142 | 1. Non-recursive, using an explicit stack of pointer that store the | |
143 | next array partition to sort. To save time, this maximum amount | |
144 | of space required to store an array of MAX_INT is allocated on the | |
145 | stack. Assuming a 32-bit integer, this needs only 32 * | |
146 | sizeof(stack_node) == 136 bits. Pretty cheap, actually. | |
147 | ||
148 | 2. Chose the pivot element using a median-of-three decision tree. | |
149 | This reduces the probability of selecting a bad pivot value and | |
150 | eliminates certain extraneous comparisons. | |
151 | ||
152 | 3. Only quicksorts TOTAL_ELEMS / MAX_THRESH partitions, leaving | |
153 | insertion sort to order the MAX_THRESH items within each partition. | |
154 | This is a big win, since insertion sort is faster for small, mostly | |
155 | sorted array segments. | |
156 | ||
157 | 4. The larger of the two sub-partitions is always pushed onto the | |
158 | stack first, with the algorithm then concentrating on the | |
159 | smaller partition. This *guarantees* no more than log (n) | |
160 | stack size is needed (actually O(1) in this case)! */ | |
161 | ||
162 | typedef int (*cmp_fun_t) (SCM less, | |
163 | const void*, | |
164 | const void*); | |
165 | ||
a34af05e MD |
166 | static const char s_buggy_less[] = "buggy less predicate used when sorting"; |
167 | ||
54e09076 MD |
168 | static void |
169 | quicksort (void *const pbase, | |
170 | size_t total_elems, | |
171 | size_t size, | |
172 | cmp_fun_t cmp, | |
173 | SCM less) | |
174 | { | |
175 | register char *base_ptr = (char *) pbase; | |
176 | ||
177 | /* Allocating SIZE bytes for a pivot buffer facilitates a better | |
178 | algorithm below since we can do comparisons directly on the pivot. */ | |
179 | char *pivot_buffer = (char *) alloca (size); | |
180 | const size_t max_thresh = MAX_THRESH * size; | |
181 | ||
182 | if (total_elems == 0) | |
183 | /* Avoid lossage with unsigned arithmetic below. */ | |
184 | return; | |
185 | ||
186 | if (total_elems > MAX_THRESH) | |
187 | { | |
188 | char *lo = base_ptr; | |
189 | char *hi = &lo[size * (total_elems - 1)]; | |
190 | /* Largest size needed for 32-bit int!!! */ | |
191 | stack_node stack[STACK_SIZE]; | |
192 | stack_node *top = stack + 1; | |
193 | ||
194 | while (STACK_NOT_EMPTY) | |
195 | { | |
196 | char *left_ptr; | |
197 | char *right_ptr; | |
198 | ||
199 | char *pivot = pivot_buffer; | |
200 | ||
201 | /* Select median value from among LO, MID, and HI. Rearrange | |
202 | LO and HI so the three values are sorted. This lowers the | |
203 | probability of picking a pathological pivot value and | |
204 | skips a comparison for both the LEFT_PTR and RIGHT_PTR. */ | |
205 | ||
206 | char *mid = lo + size * ((hi - lo) / size >> 1); | |
207 | ||
208 | if ((*cmp) (less, (void *) mid, (void *) lo)) | |
209 | SWAP (mid, lo, size); | |
210 | if ((*cmp) (less, (void *) hi, (void *) mid)) | |
211 | SWAP (mid, hi, size); | |
212 | else | |
213 | goto jump_over; | |
214 | if ((*cmp) (less, (void *) mid, (void *) lo)) | |
215 | SWAP (mid, lo, size); | |
216 | jump_over:; | |
217 | memcpy (pivot, mid, size); | |
218 | pivot = pivot_buffer; | |
219 | ||
220 | left_ptr = lo + size; | |
221 | right_ptr = hi - size; | |
222 | ||
223 | /* Here's the famous ``collapse the walls'' section of quicksort. | |
224 | Gotta like those tight inner loops! They are the main reason | |
225 | that this algorithm runs much faster than others. */ | |
226 | do | |
227 | { | |
228 | while ((*cmp) (less, (void *) left_ptr, (void *) pivot)) | |
a34af05e MD |
229 | { |
230 | left_ptr += size; | |
231 | /* The comparison predicate may be buggy */ | |
232 | if (left_ptr > hi) | |
5d2d2ffc | 233 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 234 | } |
54e09076 MD |
235 | |
236 | while ((*cmp) (less, (void *) pivot, (void *) right_ptr)) | |
a34af05e MD |
237 | { |
238 | right_ptr -= size; | |
239 | /* The comparison predicate may be buggy */ | |
240 | if (right_ptr < lo) | |
5d2d2ffc | 241 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 242 | } |
54e09076 MD |
243 | |
244 | if (left_ptr < right_ptr) | |
245 | { | |
246 | SWAP (left_ptr, right_ptr, size); | |
247 | left_ptr += size; | |
248 | right_ptr -= size; | |
249 | } | |
250 | else if (left_ptr == right_ptr) | |
251 | { | |
252 | left_ptr += size; | |
253 | right_ptr -= size; | |
254 | break; | |
255 | } | |
256 | } | |
257 | while (left_ptr <= right_ptr); | |
258 | ||
259 | /* Set up pointers for next iteration. First determine whether | |
260 | left and right partitions are below the threshold size. If so, | |
261 | ignore one or both. Otherwise, push the larger partition's | |
262 | bounds on the stack and continue sorting the smaller one. */ | |
263 | ||
264 | if ((size_t) (right_ptr - lo) <= max_thresh) | |
265 | { | |
266 | if ((size_t) (hi - left_ptr) <= max_thresh) | |
267 | /* Ignore both small partitions. */ | |
268 | POP (lo, hi); | |
269 | else | |
270 | /* Ignore small left partition. */ | |
271 | lo = left_ptr; | |
272 | } | |
273 | else if ((size_t) (hi - left_ptr) <= max_thresh) | |
274 | /* Ignore small right partition. */ | |
275 | hi = right_ptr; | |
276 | else if ((right_ptr - lo) > (hi - left_ptr)) | |
277 | { | |
278 | /* Push larger left partition indices. */ | |
279 | PUSH (lo, right_ptr); | |
280 | lo = left_ptr; | |
281 | } | |
282 | else | |
283 | { | |
284 | /* Push larger right partition indices. */ | |
285 | PUSH (left_ptr, hi); | |
286 | hi = right_ptr; | |
287 | } | |
288 | } | |
289 | } | |
290 | ||
291 | /* Once the BASE_PTR array is partially sorted by quicksort the rest | |
292 | is completely sorted using insertion sort, since this is efficient | |
293 | for partitions below MAX_THRESH size. BASE_PTR points to the beginning | |
294 | of the array to sort, and END_PTR points at the very last element in | |
295 | the array (*not* one beyond it!). */ | |
296 | ||
297 | { | |
298 | char *const end_ptr = &base_ptr[size * (total_elems - 1)]; | |
299 | char *tmp_ptr = base_ptr; | |
300 | char *thresh = min (end_ptr, base_ptr + max_thresh); | |
301 | register char *run_ptr; | |
302 | ||
303 | /* Find smallest element in first threshold and place it at the | |
304 | array's beginning. This is the smallest array element, | |
305 | and the operation speeds up insertion sort's inner loop. */ | |
306 | ||
307 | for (run_ptr = tmp_ptr + size; run_ptr <= thresh; run_ptr += size) | |
308 | if ((*cmp) (less, (void *) run_ptr, (void *) tmp_ptr)) | |
309 | tmp_ptr = run_ptr; | |
310 | ||
311 | if (tmp_ptr != base_ptr) | |
312 | SWAP (tmp_ptr, base_ptr, size); | |
313 | ||
314 | /* Insertion sort, running from left-hand-side up to right-hand-side. */ | |
315 | ||
316 | run_ptr = base_ptr + size; | |
317 | while ((run_ptr += size) <= end_ptr) | |
318 | { | |
319 | tmp_ptr = run_ptr - size; | |
320 | while ((*cmp) (less, (void *) run_ptr, (void *) tmp_ptr)) | |
a34af05e MD |
321 | { |
322 | tmp_ptr -= size; | |
323 | /* The comparison predicate may be buggy */ | |
324 | if (tmp_ptr < base_ptr) | |
5d2d2ffc | 325 | scm_misc_error (NULL, s_buggy_less, SCM_EOL); |
a34af05e | 326 | } |
54e09076 MD |
327 | |
328 | tmp_ptr += size; | |
329 | if (tmp_ptr != run_ptr) | |
330 | { | |
331 | char *trav; | |
332 | ||
333 | trav = run_ptr + size; | |
334 | while (--trav >= run_ptr) | |
335 | { | |
336 | char c = *trav; | |
337 | char *hi, *lo; | |
338 | ||
339 | for (hi = lo = trav; (lo -= size) >= tmp_ptr; hi = lo) | |
340 | *hi = *lo; | |
341 | *hi = c; | |
342 | } | |
343 | } | |
344 | } | |
345 | } | |
346 | } /* quicksort */ | |
347 | ||
348 | ||
349 | /* comparison routines */ | |
350 | ||
351 | static int | |
352 | subr2less (SCM less, const void *a, const void *b) | |
353 | { | |
354 | return SCM_NFALSEP (SCM_SUBRF (less) (*(SCM *) a, *(SCM *) b)); | |
355 | } /* subr2less */ | |
356 | ||
357 | static int | |
358 | subr2oless (SCM less, const void *a, const void *b) | |
359 | { | |
360 | return SCM_NFALSEP (SCM_SUBRF (less) (*(SCM *) a, | |
361 | *(SCM *) b, | |
362 | SCM_UNDEFINED)); | |
363 | } /* subr2oless */ | |
364 | ||
365 | static int | |
366 | lsubrless (SCM less, const void *a, const void *b) | |
367 | { | |
368 | return SCM_NFALSEP (SCM_SUBRF (less) | |
369 | (scm_cons (*(SCM *) a, | |
370 | scm_cons (*(SCM *) b, SCM_EOL)))); | |
371 | } /* lsubrless */ | |
372 | ||
373 | static int | |
374 | closureless (SCM code, const void *a, const void *b) | |
375 | { | |
4c6fe5fc MD |
376 | SCM env = SCM_EXTEND_ENV (SCM_CAR (SCM_CODE (code)), |
377 | scm_cons (*(SCM *) a, | |
378 | scm_cons (*(SCM *) b, SCM_EOL)), | |
379 | SCM_ENV (code)); | |
54e09076 | 380 | /* Evaluate the closure body */ |
34249025 | 381 | return SCM_NFALSEP (scm_eval_body (SCM_CDR (SCM_CODE (code)), env)); |
54e09076 MD |
382 | } /* closureless */ |
383 | ||
384 | static int | |
385 | applyless (SCM less, const void *a, const void *b) | |
386 | { | |
54778cd3 | 387 | return SCM_NFALSEP (scm_apply (less, |
54e09076 MD |
388 | scm_cons (*(SCM *) a, |
389 | scm_cons (*(SCM *) b, SCM_EOL)), | |
390 | SCM_EOL)); | |
391 | } /* applyless */ | |
392 | ||
393 | static cmp_fun_t | |
394 | scm_cmp_function (SCM p) | |
395 | { | |
396 | switch (SCM_TYP7 (p)) | |
397 | { | |
398 | case scm_tc7_subr_2: | |
399 | case scm_tc7_rpsubr: | |
400 | case scm_tc7_asubr: | |
401 | return subr2less; | |
402 | case scm_tc7_subr_2o: | |
403 | return subr2oless; | |
404 | case scm_tc7_lsubr: | |
405 | return lsubrless; | |
406 | case scm_tcs_closures: | |
407 | return closureless; | |
408 | default: | |
409 | return applyless; | |
410 | } | |
411 | } /* scm_cmp_function */ | |
412 | ||
54e09076 MD |
413 | |
414 | /* Question: Is there any need to make this a more general array sort? | |
415 | It is probably enough to manage the vector type. */ | |
416 | /* endpos equal as for substring, i.e. endpos is not included. */ | |
da4a1dba | 417 | /* More natural with length? */ |
1bbd0b84 | 418 | |
a1ec6916 | 419 | SCM_DEFINE (scm_restricted_vector_sort_x, "restricted-vector-sort!", 4, 0, 0, |
1bbd0b84 GB |
420 | (SCM vec, SCM less, SCM startpos, SCM endpos), |
421 | "") | |
422 | #define FUNC_NAME s_scm_restricted_vector_sort_x | |
54e09076 MD |
423 | { |
424 | size_t vlen, spos, len, size = sizeof (SCM); | |
425 | SCM *vp; | |
426 | ||
6b5a304f GB |
427 | SCM_VALIDATE_NIM (1,vec); |
428 | SCM_VALIDATE_NIM (2,less); | |
54e09076 MD |
429 | switch (SCM_TYP7 (vec)) |
430 | { | |
431 | case scm_tc7_vector: /* the only type we manage is vector */ | |
432 | break; | |
afe5177e | 433 | #if 0 /* HAVE_ARRAYS */ |
54e09076 MD |
434 | case scm_tc7_ivect: /* long */ |
435 | case scm_tc7_uvect: /* unsigned */ | |
436 | case scm_tc7_fvect: /* float */ | |
437 | case scm_tc7_dvect: /* double */ | |
afe5177e | 438 | #endif |
54e09076 | 439 | default: |
1bbd0b84 | 440 | SCM_WTA (1,vec); |
54e09076 MD |
441 | } |
442 | vp = SCM_VELTS (vec); /* vector pointer */ | |
443 | vlen = SCM_LENGTH (vec); | |
444 | ||
3b3b36dd | 445 | SCM_VALIDATE_INUM_COPY (3,startpos,spos); |
1bbd0b84 | 446 | SCM_ASSERT_RANGE (3,startpos,(spos >= 0) && (spos <= vlen)); |
3b3b36dd | 447 | SCM_VALIDATE_INUM_RANGE (4,endpos,0,vlen+1); |
54e09076 MD |
448 | len = SCM_INUM (endpos) - spos; |
449 | ||
450 | quicksort (&vp[spos], len, size, scm_cmp_function (less), less); | |
451 | return SCM_UNSPECIFIED; | |
452 | /* return vec; */ | |
1bbd0b84 GB |
453 | } |
454 | #undef FUNC_NAME | |
54e09076 MD |
455 | |
456 | /* (sorted? sequence less?) | |
457 | * is true when sequence is a list (x0 x1 ... xm) or a vector #(x0 ... xm) | |
458 | * such that for all 1 <= i <= m, | |
459 | * (not (less? (list-ref list i) (list-ref list (- i 1)))). */ | |
a1ec6916 | 460 | SCM_DEFINE (scm_sorted_p, "sorted?", 2, 0, 0, |
1bbd0b84 GB |
461 | (SCM items, SCM less), |
462 | "") | |
463 | #define FUNC_NAME s_scm_sorted_p | |
54e09076 MD |
464 | { |
465 | long len, j; /* list/vector length, temp j */ | |
466 | SCM item, rest; /* rest of items loop variable */ | |
467 | SCM *vp; | |
468 | cmp_fun_t cmp = scm_cmp_function (less); | |
469 | ||
470 | if (SCM_NULLP (items)) | |
471 | return SCM_BOOL_T; | |
1bbd0b84 | 472 | |
6b5a304f GB |
473 | SCM_VALIDATE_NIM (1,items); |
474 | SCM_VALIDATE_NIM (2,less); | |
54e09076 MD |
475 | |
476 | if (SCM_CONSP (items)) | |
477 | { | |
478 | len = scm_ilength (items); /* also checks that it's a pure list */ | |
1bbd0b84 | 479 | SCM_ASSERT_RANGE (1,items,len >= 0); |
54e09076 MD |
480 | if (len <= 1) |
481 | return SCM_BOOL_T; | |
482 | ||
483 | item = SCM_CAR (items); | |
484 | rest = SCM_CDR (items); | |
485 | j = len - 1; | |
486 | while (j > 0) | |
487 | { | |
488 | if ((*cmp) (less, &SCM_CAR(rest), &item)) | |
489 | return SCM_BOOL_F; | |
490 | else | |
491 | { | |
492 | item = SCM_CAR (rest); | |
493 | rest = SCM_CDR (rest); | |
494 | j--; | |
495 | } | |
496 | } | |
497 | return SCM_BOOL_T; | |
498 | } | |
499 | else | |
500 | { | |
501 | switch (SCM_TYP7 (items)) | |
502 | { | |
503 | case scm_tc7_vector: | |
504 | { | |
505 | vp = SCM_VELTS (items); /* vector pointer */ | |
506 | len = SCM_LENGTH (items); | |
507 | j = len - 1; | |
508 | while (j > 0) | |
509 | { | |
510 | if ((*cmp) (less, &vp[1], vp)) | |
511 | return SCM_BOOL_F; | |
512 | else | |
513 | { | |
514 | vp++; | |
515 | j--; | |
516 | } | |
517 | } | |
518 | return SCM_BOOL_T; | |
519 | } | |
520 | break; | |
afe5177e | 521 | #if 0 /* HAVE_ARRAYS */ |
54e09076 MD |
522 | case scm_tc7_ivect: /* long */ |
523 | case scm_tc7_uvect: /* unsigned */ | |
524 | case scm_tc7_fvect: /* float */ | |
525 | case scm_tc7_dvect: /* double */ | |
afe5177e | 526 | #endif |
54e09076 | 527 | default: |
1bbd0b84 | 528 | SCM_WTA (1,items); |
54e09076 MD |
529 | } |
530 | } | |
531 | return SCM_BOOL_F; | |
1bbd0b84 GB |
532 | } |
533 | #undef FUNC_NAME | |
54e09076 MD |
534 | |
535 | /* (merge a b less?) | |
536 | takes two lists a and b such that (sorted? a less?) and (sorted? b less?) | |
537 | and returns a new list in which the elements of a and b have been stably | |
538 | interleaved so that (sorted? (merge a b less?) less?). | |
539 | Note: this does _not_ accept vectors. */ | |
a1ec6916 | 540 | SCM_DEFINE (scm_merge, "merge", 3, 0, 0, |
1bbd0b84 GB |
541 | (SCM alist, SCM blist, SCM less), |
542 | "") | |
543 | #define FUNC_NAME s_scm_merge | |
54e09076 MD |
544 | { |
545 | long alen, blen; /* list lengths */ | |
546 | SCM build, last; | |
547 | cmp_fun_t cmp = scm_cmp_function (less); | |
6b5a304f | 548 | SCM_VALIDATE_NIM (3,less); |
54e09076 MD |
549 | |
550 | if (SCM_NULLP (alist)) | |
551 | return blist; | |
552 | else if (SCM_NULLP (blist)) | |
553 | return alist; | |
554 | else | |
555 | { | |
3b3b36dd GB |
556 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1,alist,alen); |
557 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2,blist,blen); | |
c56cc3c8 | 558 | if ((*cmp) (less, &SCM_CAR (blist), &SCM_CAR (alist))) |
54e09076 MD |
559 | { |
560 | build = scm_cons (SCM_CAR (blist), SCM_EOL); | |
561 | blist = SCM_CDR (blist); | |
562 | blen--; | |
563 | } | |
c56cc3c8 MD |
564 | else |
565 | { | |
566 | build = scm_cons (SCM_CAR (alist), SCM_EOL); | |
567 | alist = SCM_CDR (alist); | |
568 | alen--; | |
569 | } | |
54e09076 MD |
570 | last = build; |
571 | while ((alen > 0) && (blen > 0)) | |
572 | { | |
c56cc3c8 | 573 | if ((*cmp) (less, &SCM_CAR (blist), &SCM_CAR (alist))) |
54e09076 MD |
574 | { |
575 | SCM_SETCDR (last, scm_cons (SCM_CAR (blist), SCM_EOL)); | |
576 | blist = SCM_CDR (blist); | |
577 | blen--; | |
578 | } | |
c56cc3c8 MD |
579 | else |
580 | { | |
581 | SCM_SETCDR (last, scm_cons (SCM_CAR (alist), SCM_EOL)); | |
582 | alist = SCM_CDR (alist); | |
583 | alen--; | |
584 | } | |
54e09076 MD |
585 | last = SCM_CDR (last); |
586 | } | |
587 | if ((alen > 0) && (blen == 0)) | |
588 | SCM_SETCDR (last, alist); | |
589 | else if ((alen == 0) && (blen > 0)) | |
590 | SCM_SETCDR (last, blist); | |
591 | } | |
592 | return build; | |
1bbd0b84 GB |
593 | } |
594 | #undef FUNC_NAME | |
595 | ||
54e09076 MD |
596 | |
597 | static SCM | |
598 | scm_merge_list_x (SCM alist, SCM blist, | |
599 | long alen, long blen, | |
600 | cmp_fun_t cmp, SCM less) | |
601 | { | |
602 | SCM build, last; | |
603 | ||
604 | if (SCM_NULLP (alist)) | |
605 | return blist; | |
606 | else if (SCM_NULLP (blist)) | |
607 | return alist; | |
608 | else | |
609 | { | |
c56cc3c8 | 610 | if ((*cmp) (less, &SCM_CAR (blist), &SCM_CAR (alist))) |
54e09076 MD |
611 | { |
612 | build = blist; | |
613 | blist = SCM_CDR (blist); | |
614 | blen--; | |
615 | } | |
c56cc3c8 MD |
616 | else |
617 | { | |
618 | build = alist; | |
619 | alist = SCM_CDR (alist); | |
620 | alen--; | |
621 | } | |
54e09076 MD |
622 | last = build; |
623 | while ((alen > 0) && (blen > 0)) | |
624 | { | |
c56cc3c8 | 625 | if ((*cmp) (less, &SCM_CAR (blist), &SCM_CAR (alist))) |
54e09076 MD |
626 | { |
627 | SCM_SETCDR (last, blist); | |
628 | blist = SCM_CDR (blist); | |
629 | blen--; | |
630 | } | |
c56cc3c8 MD |
631 | else |
632 | { | |
633 | SCM_SETCDR (last, alist); | |
634 | alist = SCM_CDR (alist); | |
635 | alen--; | |
636 | } | |
54e09076 MD |
637 | last = SCM_CDR (last); |
638 | } | |
639 | if ((alen > 0) && (blen == 0)) | |
640 | SCM_SETCDR (last, alist); | |
641 | else if ((alen == 0) && (blen > 0)) | |
642 | SCM_SETCDR (last, blist); | |
643 | } | |
644 | return build; | |
645 | } /* scm_merge_list_x */ | |
646 | ||
a1ec6916 | 647 | SCM_DEFINE (scm_merge_x, "merge!", 3, 0, 0, |
1bbd0b84 GB |
648 | (SCM alist, SCM blist, SCM less), |
649 | "") | |
650 | #define FUNC_NAME s_scm_merge_x | |
54e09076 MD |
651 | { |
652 | long alen, blen; /* list lengths */ | |
653 | ||
6b5a304f | 654 | SCM_VALIDATE_NIM (3,less); |
54e09076 MD |
655 | if (SCM_NULLP (alist)) |
656 | return blist; | |
657 | else if (SCM_NULLP (blist)) | |
658 | return alist; | |
659 | else | |
660 | { | |
3b3b36dd GB |
661 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (1,alist,alen); |
662 | SCM_VALIDATE_NONEMPTYLIST_COPYLEN (2,blist,blen); | |
54e09076 MD |
663 | return scm_merge_list_x (alist, blist, |
664 | alen, blen, | |
665 | scm_cmp_function (less), | |
666 | less); | |
667 | } | |
1bbd0b84 GB |
668 | } |
669 | #undef FUNC_NAME | |
54e09076 MD |
670 | |
671 | /* This merge sort algorithm is same as slib's by Richard A. O'Keefe. | |
672 | The algorithm is stable. We also tried to use the algorithm used by | |
673 | scsh's merge-sort but that algorithm showed to not be stable, even | |
674 | though it claimed to be. | |
675 | */ | |
676 | static SCM | |
677 | scm_merge_list_step (SCM * seq, | |
678 | cmp_fun_t cmp, | |
679 | SCM less, | |
680 | int n) | |
681 | { | |
c56cc3c8 MD |
682 | SCM a, b; |
683 | ||
54e09076 MD |
684 | if (n > 2) |
685 | { | |
686 | long mid = n / 2; | |
c56cc3c8 MD |
687 | a = scm_merge_list_step (seq, cmp, less, mid); |
688 | b = scm_merge_list_step (seq, cmp, less, n - mid); | |
689 | return scm_merge_list_x (a, b, mid, n - mid, cmp, less); | |
54e09076 MD |
690 | } |
691 | else if (n == 2) | |
692 | { | |
693 | SCM p = *seq; | |
694 | SCM rest = SCM_CDR (*seq); | |
695 | SCM x = SCM_CAR (*seq); | |
696 | SCM y = SCM_CAR (SCM_CDR (*seq)); | |
697 | *seq = SCM_CDR (rest); | |
698 | SCM_SETCDR (rest, SCM_EOL); | |
699 | if ((*cmp) (less, &y, &x)) | |
700 | { | |
701 | SCM_CAR (p) = y; | |
702 | SCM_CAR (rest) = x; | |
703 | } | |
704 | return p; | |
705 | } | |
706 | else if (n == 1) | |
707 | { | |
708 | SCM p = *seq; | |
709 | *seq = SCM_CDR (p); | |
710 | SCM_SETCDR (p, SCM_EOL); | |
711 | return p; | |
712 | } | |
713 | else | |
714 | return SCM_EOL; | |
715 | } /* scm_merge_list_step */ | |
716 | ||
717 | ||
54e09076 | 718 | /* scm_sort_x manages lists and vectors, not stable sort */ |
a1ec6916 | 719 | SCM_DEFINE (scm_sort_x, "sort!", 2, 0, 0, |
1bbd0b84 GB |
720 | (SCM items, SCM less), |
721 | "") | |
722 | #define FUNC_NAME s_scm_sort_x | |
54e09076 MD |
723 | { |
724 | long len; /* list/vector length */ | |
725 | if (SCM_NULLP(items)) | |
726 | return SCM_EOL; | |
6b5a304f GB |
727 | SCM_VALIDATE_NIM (1,items); |
728 | SCM_VALIDATE_NIM (2,less); | |
54e09076 MD |
729 | |
730 | if (SCM_CONSP (items)) | |
731 | { | |
3b3b36dd | 732 | SCM_VALIDATE_LIST_COPYLEN (1,items,len); |
54e09076 MD |
733 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); |
734 | } | |
735 | else if (SCM_VECTORP (items)) | |
736 | { | |
737 | len = SCM_LENGTH (items); | |
738 | scm_restricted_vector_sort_x (items, | |
739 | less, | |
740 | SCM_MAKINUM (0L), | |
741 | SCM_MAKINUM (len)); | |
742 | return items; | |
743 | } | |
744 | else | |
1bbd0b84 GB |
745 | RETURN_SCM_WTA (1,items); |
746 | } | |
0f981281 | 747 | #undef FUNC_NAME |
54e09076 MD |
748 | |
749 | /* scm_sort manages lists and vectors, not stable sort */ | |
1bbd0b84 | 750 | |
a1ec6916 | 751 | SCM_DEFINE (scm_sort, "sort", 2, 0, 0, |
1bbd0b84 GB |
752 | (SCM items, SCM less), |
753 | "") | |
754 | #define FUNC_NAME s_scm_sort | |
54e09076 MD |
755 | { |
756 | SCM sortvec; /* the vector we actually sort */ | |
757 | long len; /* list/vector length */ | |
758 | if (SCM_NULLP(items)) | |
759 | return SCM_EOL; | |
6b5a304f GB |
760 | SCM_VALIDATE_NIM (1,items); |
761 | SCM_VALIDATE_NIM (2,less); | |
54e09076 MD |
762 | if (SCM_CONSP (items)) |
763 | { | |
3b3b36dd | 764 | SCM_VALIDATE_LIST_COPYLEN (1,items,len); |
54e09076 MD |
765 | items = scm_list_copy (items); |
766 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); | |
767 | } | |
afe5177e GH |
768 | #ifdef HAVE_ARRAYS |
769 | /* support ordinary vectors even if arrays not available? */ | |
54e09076 MD |
770 | else if (SCM_VECTORP (items)) |
771 | { | |
772 | len = SCM_LENGTH (items); | |
773 | sortvec = scm_make_uve (len, scm_array_prototype (items)); | |
774 | scm_array_copy_x (items, sortvec); | |
775 | scm_restricted_vector_sort_x (sortvec, | |
776 | less, | |
777 | SCM_MAKINUM (0L), | |
778 | SCM_MAKINUM (len)); | |
779 | return sortvec; | |
780 | } | |
afe5177e | 781 | #endif |
54e09076 | 782 | else |
1bbd0b84 GB |
783 | RETURN_SCM_WTA (1,items); |
784 | } | |
0f981281 | 785 | #undef FUNC_NAME |
54e09076 MD |
786 | |
787 | static void | |
788 | scm_merge_vector_x (void *const vecbase, | |
789 | void *const tempbase, | |
790 | cmp_fun_t cmp, | |
791 | SCM less, | |
792 | long low, | |
793 | long mid, | |
794 | long high) | |
795 | { | |
796 | register SCM *vp = (SCM *) vecbase; | |
797 | register SCM *temp = (SCM *) tempbase; | |
798 | long it; /* Index for temp vector */ | |
799 | long i1 = low; /* Index for lower vector segment */ | |
800 | long i2 = mid + 1; /* Index for upper vector segment */ | |
801 | ||
802 | /* Copy while both segments contain more characters */ | |
803 | for (it = low; (i1 <= mid) && (i2 <= high); ++it) | |
804 | if ((*cmp) (less, &vp[i2], &vp[i1])) | |
805 | temp[it] = vp[i2++]; | |
806 | else | |
807 | temp[it] = vp[i1++]; | |
808 | ||
809 | /* Copy while first segment contains more characters */ | |
810 | while (i1 <= mid) | |
811 | temp[it++] = vp[i1++]; | |
812 | ||
813 | /* Copy while second segment contains more characters */ | |
814 | while (i2 <= high) | |
815 | temp[it++] = vp[i2++]; | |
816 | ||
817 | /* Copy back from temp to vp */ | |
818 | for (it = low; it <= high; ++it) | |
819 | vp[it] = temp[it]; | |
820 | } /* scm_merge_vector_x */ | |
821 | ||
822 | static void | |
823 | scm_merge_vector_step (void *const vp, | |
824 | void *const temp, | |
825 | cmp_fun_t cmp, | |
826 | SCM less, | |
827 | long low, | |
828 | long high) | |
829 | { | |
830 | if (high > low) | |
831 | { | |
832 | long mid = (low + high) / 2; | |
833 | scm_merge_vector_step (vp, temp, cmp, less, low, mid); | |
834 | scm_merge_vector_step (vp, temp, cmp, less, mid+1, high); | |
835 | scm_merge_vector_x (vp, temp, cmp, less, low, mid, high); | |
836 | } | |
837 | } /* scm_merge_vector_step */ | |
838 | ||
839 | ||
54e09076 MD |
840 | /* stable-sort! manages lists and vectors */ |
841 | ||
a1ec6916 | 842 | SCM_DEFINE (scm_stable_sort_x, "stable-sort!", 2, 0, 0, |
1bbd0b84 GB |
843 | (SCM items, SCM less), |
844 | "") | |
845 | #define FUNC_NAME s_scm_stable_sort_x | |
54e09076 MD |
846 | { |
847 | long len; /* list/vector length */ | |
848 | ||
849 | if (SCM_NULLP (items)) | |
850 | return SCM_EOL; | |
6b5a304f GB |
851 | SCM_VALIDATE_NIM (1,items); |
852 | SCM_VALIDATE_NIM (2,less); | |
54e09076 MD |
853 | if (SCM_CONSP (items)) |
854 | { | |
3b3b36dd | 855 | SCM_VALIDATE_LIST_COPYLEN (1,items,len); |
54e09076 MD |
856 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); |
857 | } | |
858 | else if (SCM_VECTORP (items)) | |
859 | { | |
860 | SCM *temp, *vp; | |
861 | len = SCM_LENGTH (items); | |
862 | temp = malloc (len * sizeof(SCM)); | |
863 | vp = SCM_VELTS (items); | |
864 | scm_merge_vector_step (vp, | |
865 | temp, | |
866 | scm_cmp_function (less), | |
867 | less, | |
868 | 0, | |
869 | len - 1); | |
870 | free(temp); | |
871 | return items; | |
872 | } | |
873 | else | |
1bbd0b84 GB |
874 | RETURN_SCM_WTA (1,items); |
875 | } | |
0f981281 | 876 | #undef FUNC_NAME |
54e09076 MD |
877 | |
878 | /* stable_sort manages lists and vectors */ | |
1bbd0b84 | 879 | |
a1ec6916 | 880 | SCM_DEFINE (scm_stable_sort, "stable-sort", 2, 0, 0, |
1bbd0b84 GB |
881 | (SCM items, SCM less), |
882 | "") | |
883 | #define FUNC_NAME s_scm_stable_sort | |
54e09076 MD |
884 | { |
885 | long len; /* list/vector length */ | |
886 | if (SCM_NULLP (items)) | |
887 | return SCM_EOL; | |
6b5a304f GB |
888 | SCM_VALIDATE_NIM (1,items); |
889 | SCM_VALIDATE_NIM (2,less); | |
54e09076 MD |
890 | if (SCM_CONSP (items)) |
891 | { | |
3b3b36dd | 892 | SCM_VALIDATE_LIST_COPYLEN (1,items,len); |
54e09076 MD |
893 | items = scm_list_copy (items); |
894 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); | |
895 | } | |
afe5177e GH |
896 | #ifdef HAVE_ARRAYS |
897 | /* support ordinary vectors even if arrays not available? */ | |
54e09076 MD |
898 | else if (SCM_VECTORP (items)) |
899 | { | |
900 | SCM retvec; | |
901 | SCM *temp, *vp; | |
902 | len = SCM_LENGTH (items); | |
903 | retvec = scm_make_uve (len, scm_array_prototype (items)); | |
904 | scm_array_copy_x (items, retvec); | |
905 | temp = malloc (len * sizeof (SCM)); | |
906 | vp = SCM_VELTS (retvec); | |
907 | scm_merge_vector_step (vp, | |
908 | temp, | |
909 | scm_cmp_function (less), | |
910 | less, | |
911 | 0, | |
912 | len - 1); | |
913 | free (temp); | |
914 | return retvec; | |
915 | } | |
afe5177e | 916 | #endif |
54e09076 | 917 | else |
1bbd0b84 GB |
918 | RETURN_SCM_WTA (1,items); |
919 | } | |
0f981281 | 920 | #undef FUNC_NAME |
54e09076 | 921 | |
1bbd0b84 | 922 | /* stable */ |
a1ec6916 | 923 | SCM_DEFINE (scm_sort_list_x, "sort-list!", 2, 0, 0, |
1bbd0b84 GB |
924 | (SCM items, SCM less), |
925 | "") | |
926 | #define FUNC_NAME s_scm_sort_list_x | |
54e09076 | 927 | { |
1bbd0b84 | 928 | long len; |
3b3b36dd | 929 | SCM_VALIDATE_LIST_COPYLEN (1,items,len); |
6b5a304f | 930 | SCM_VALIDATE_NIM (2,less); |
54e09076 | 931 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); |
1bbd0b84 | 932 | } |
0f981281 | 933 | #undef FUNC_NAME |
54e09076 | 934 | |
1bbd0b84 | 935 | /* stable */ |
a1ec6916 | 936 | SCM_DEFINE (scm_sort_list, "sort-list", 2, 0, 0, |
1bbd0b84 GB |
937 | (SCM items, SCM less), |
938 | "") | |
939 | #define FUNC_NAME s_scm_sort_list | |
54e09076 | 940 | { |
1bbd0b84 | 941 | long len; |
3b3b36dd | 942 | SCM_VALIDATE_LIST_COPYLEN (1,items,len); |
6b5a304f | 943 | SCM_VALIDATE_NIM (2,less); |
54e09076 MD |
944 | items = scm_list_copy (items); |
945 | return scm_merge_list_step (&items, scm_cmp_function (less), less, len); | |
1bbd0b84 | 946 | } |
0f981281 | 947 | #undef FUNC_NAME |
54e09076 MD |
948 | |
949 | void | |
950 | scm_init_sort () | |
951 | { | |
952 | #include "sort.x" | |
953 | ||
954 | scm_add_feature ("sort"); | |
955 | } | |
89e00824 ML |
956 | |
957 | /* | |
958 | Local Variables: | |
959 | c-file-style: "gnu" | |
960 | End: | |
961 | */ |