Commit | Line | Data |
---|---|---|
0c842e3a NG |
1 | diff --git a/rp-private.h b/rp-private.h |
2 | index b4c7dad..0c7193e 100644 | |
3 | --- a/rp-private.h | |
4 | +++ b/rp-private.h | |
5 | @@ -36,7 +36,7 @@ | |
6 | #define LONG_SHIFT ((LONG_LENGTH == 16) ? 4 : \ | |
7 | (LONG_LENGTH == 32) ? 5 : \ | |
8 | (LONG_LENGTH == 64) ? 6 : 0) | |
9 | -#define LONG_MASK (~(-1L<<LONG_SHIFT)) | |
10 | +#define LONG_MASK (~(-(1L<<LONG_SHIFT))) | |
11 | ||
12 | /* Check if SSE instructions can be used. | |
13 | We assume that one SSE word of 128 bit is two long's, | |
14 | diff --git a/sturm.c b/sturm.c | |
15 | index c78d7c6..5fd2cf5 100644 | |
16 | --- a/sturm.c | |
17 | +++ b/sturm.c | |
18 | @@ -27,7 +27,6 @@ | |
19 | ***********************************************************************/ | |
20 | ||
21 | #include "ratpoints.h" | |
22 | - | |
23 | /************************************************************************** | |
24 | * Arguments of _ratpoints_compute_sturm() : (from the args argument) * | |
25 | * * | |
26 | @@ -53,7 +52,7 @@ | |
27 | /* A helper function: evaluate the polynomial in cofs[] of given degree | |
28 | at num/2^denexp and return the sign. */ | |
29 | ||
30 | -static long eval_sign(ratpoints_args *args, mpz_t *cofs, long degree, | |
31 | +static long eval_sign(const ratpoints_args *args, const mpz_t *cofs, long degree, | |
32 | long num, long denexp) | |
33 | { | |
34 | long n, e, s; | |
35 | @@ -70,11 +69,80 @@ static long eval_sign(ratpoints_args *args, mpz_t *cofs, long degree, | |
36 | return(s); | |
37 | } | |
38 | ||
39 | +static const long max = (long)(((unsigned long)(-1))>>1); | |
40 | +static const long min = (long)(-(((unsigned long)(-1))>>1)); | |
41 | + /* recursive helper function */ | |
42 | +static void iterate(long nl, long nr, long del, long der, long cleft, long cright, | |
43 | + long sl, long sr, long depth, | |
44 | + ratpoints_interval **iptr, const ratpoints_interval *ivlo, | |
45 | + const ratpoints_args *args, const long k, const long sturm_degs[], | |
46 | + const mpz_t sturm[][args->degree + 1]) | |
47 | + { /* nl/2^del, nr/2^der : interval left/right endpoints, | |
48 | + cleft, cright: sign change counts at endpoints, | |
49 | + sl, sr: signs at endpoints, | |
50 | + depth: iteration depth */ | |
51 | + long iter = args->sturm; | |
52 | + if(cleft == cright && sl < 0) { return; } | |
53 | + /* here we know the polynomial is negative on the interval */ | |
54 | + if((cleft == cright && sl > 0) || depth >= iter) | |
55 | + /* we have to add/extend an interval if we either know that | |
56 | + the polynomial is positive on the interval (first condition) | |
57 | + or the maximal iteration depth has been reached (second condition) */ | |
58 | + { double l = ((double)nl)/((double)(1<<del)); | |
59 | + double u = ((double)nr)/((double)(1<<der)); | |
60 | + if(*iptr == ivlo) | |
61 | + { (*iptr)->low = l; (*iptr)->up = u; (*iptr)++; } | |
62 | + else | |
63 | + { if(((*iptr)-1)->up == l) /* extend interval */ | |
64 | + { ((*iptr)-1)->up = u; } | |
65 | + else /* new interval */ | |
66 | + { (*iptr)->low = l; (*iptr)->up = u; (*iptr)++; } | |
67 | + } | |
68 | + return; | |
69 | + } | |
70 | + /* now we must split the interval and evaluate the sturm sequence | |
71 | + at the midpoint */ | |
72 | + { long nm, dem, s0, s1, s2, s, cmid = 0, n; | |
73 | + if(nl == min) | |
74 | + { if(nr == max) { nm = 0; dem = 0; } | |
75 | + else { nm = (nr == 0) ? -1 : 2*nr; dem = 0; } | |
76 | + } | |
77 | + else | |
78 | + { if(nr == max) { nm = (nl == 0) ? 1 : 2*nl; dem = 0; } | |
79 | + else /* "normal" case */ | |
80 | + { if(del == der) /* then both are zero */ | |
81 | + { if(((nl+nr) & 1) == 0) { nm = (nl+nr)>>1; dem = 0; } | |
82 | + else { nm = nl+nr; dem = 1; } | |
83 | + } | |
84 | + else /* here one de* is greater */ | |
85 | + { if(del > der) { nm = nl + (nr<<(del-der)); dem = del+1; } | |
86 | + else { nm = (nl<<(der-del)) + nr; dem = der+1; } | |
87 | + } | |
88 | + } | |
89 | + } | |
90 | + s0 = eval_sign(args, sturm[0], sturm_degs[0], nm, dem); | |
91 | + s1 = eval_sign(args, sturm[1], sturm_degs[1], nm, dem); | |
92 | + if(s0*s1 == -1) { cmid++; } | |
93 | + s = (s1 == 0) ? s0 : s1; | |
94 | + for(n = 2; n <= k; n++) | |
95 | + { s2 = eval_sign(args, sturm[n], sturm_degs[n], nm, dem); | |
96 | + if(s2 == -s) { cmid++; s = s2; } | |
97 | + else if(s2 != 0) { s = s2; } | |
98 | + } | |
99 | + /* now recurse */ | |
100 | + iterate(nl, nm, del, dem, cleft, (s0==0) ? (cmid+1) : cmid, | |
101 | + sl, (s0==0) ? -s1 : s0, depth+1, | |
102 | + iptr, ivlo, args, k, sturm_degs, sturm); | |
103 | + iterate(nm, nr, dem, der, cmid, cright, | |
104 | + (s0==0) ? s1 : s0, sr, depth+1, | |
105 | + iptr, ivlo, args, k, sturm_degs, sturm); | |
106 | + } | |
107 | + } /* end iterate() */ | |
108 | + | |
109 | long _ratpoints_compute_sturm(ratpoints_args *args) | |
110 | { | |
111 | mpz_t *cofs = args->cof; | |
112 | long degree = args->degree; | |
113 | - long iter = args->sturm; | |
114 | ratpoints_interval *ivlist = args->domain; | |
115 | long num_iv = args->num_inter; | |
116 | long n, m, k, new_num; | |
117 | @@ -165,75 +233,12 @@ long _ratpoints_compute_sturm(ratpoints_args *args) | |
118 | /* recall: typedef struct {double low; double up;} ratpoints_interval; */ | |
119 | { ratpoints_interval ivlocal[1 + (degree>>1)]; | |
120 | ratpoints_interval *iptr = &ivlocal[0]; | |
121 | - long max = (long)(((unsigned long)(-1))>>1); | |
122 | - long min = -max; | |
123 | long num_intervals; | |
124 | long slcf = mpz_cmp_si(cofs[degree], 0); | |
125 | ||
126 | - /* recursive helper function */ | |
127 | - void iterate(long nl, long nr, long del, long der, long cleft, long cright, | |
128 | - long sl, long sr, long depth) | |
129 | - { /* nl/2^del, nr/2^der : interval left/right endpoints, | |
130 | - cleft, cright: sign change counts at endpoints, | |
131 | - sl, sr: signs at endpoints, | |
132 | - depth: iteration depth */ | |
133 | - if(cleft == cright && sl < 0) { return; } | |
134 | - /* here we know the polynomial is negative on the interval */ | |
135 | - if((cleft == cright && sl > 0) || depth >= iter) | |
136 | - /* we have to add/extend an interval if we either know that | |
137 | - the polynomial is positive on the interval (first condition) | |
138 | - or the maximal iteration depth has been reached (second condition) */ | |
139 | - { double l = ((double)nl)/((double)(1<<del)); | |
140 | - double u = ((double)nr)/((double)(1<<der)); | |
141 | - if(iptr == &ivlocal[0]) | |
142 | - { iptr->low = l; iptr->up = u; iptr++; } | |
143 | - else | |
144 | - { if((iptr-1)->up == l) /* extend interval */ | |
145 | - { (iptr-1)->up = u; } | |
146 | - else /* new interval */ | |
147 | - { iptr->low = l; iptr->up = u; iptr++; } | |
148 | - } | |
149 | - return; | |
150 | - } | |
151 | - /* now we must split the interval and evaluate the sturm sequence | |
152 | - at the midpoint */ | |
153 | - { long nm, dem, s0, s1, s2, s, cmid = 0, n; | |
154 | - if(nl == min) | |
155 | - { if(nr == max) { nm = 0; dem = 0; } | |
156 | - else { nm = (nr == 0) ? -1 : 2*nr; dem = 0; } | |
157 | - } | |
158 | - else | |
159 | - { if(nr == max) { nm = (nl == 0) ? 1 : 2*nl; dem = 0; } | |
160 | - else /* "normal" case */ | |
161 | - { if(del == der) /* then both are zero */ | |
162 | - { if(((nl+nr) & 1) == 0) { nm = (nl+nr)>>1; dem = 0; } | |
163 | - else { nm = nl+nr; dem = 1; } | |
164 | - } | |
165 | - else /* here one de* is greater */ | |
166 | - { if(del > der) { nm = nl + (nr<<(del-der)); dem = del+1; } | |
167 | - else { nm = (nl<<(der-del)) + nr; dem = der+1; } | |
168 | - } | |
169 | - } | |
170 | - } | |
171 | - s0 = eval_sign(args, sturm[0], sturm_degs[0], nm, dem); | |
172 | - s1 = eval_sign(args, sturm[1], sturm_degs[1], nm, dem); | |
173 | - if(s0*s1 == -1) { cmid++; } | |
174 | - s = (s1 == 0) ? s0 : s1; | |
175 | - for(n = 2; n <= k; n++) | |
176 | - { s2 = eval_sign(args, sturm[n], sturm_degs[n], nm, dem); | |
177 | - if(s2 == -s) { cmid++; s = s2; } | |
178 | - else if(s2 != 0) { s = s2; } | |
179 | - } | |
180 | - /* now recurse */ | |
181 | - iterate(nl, nm, del, dem, cleft, (s0==0) ? (cmid+1) : cmid, | |
182 | - sl, (s0==0) ? -s1 : s0, depth+1); | |
183 | - iterate(nm, nr, dem, der, cmid, cright, | |
184 | - (s0==0) ? s1 : s0, sr, depth+1); | |
185 | - } | |
186 | - } /* end iterate() */ | |
187 | - | |
188 | iterate(min, max, 0, 0, count2, count1, | |
189 | - (degree & 1) ? -slcf : slcf, slcf, 0); | |
190 | + (degree & 1) ? -slcf : slcf, slcf, 0, | |
191 | + &iptr, &ivlocal[0], args, k, sturm_degs, sturm); | |
192 | num_intervals = iptr - &ivlocal[0]; | |
193 | /* intersect with given intervals */ | |
194 | { ratpoints_interval local_copy[num_iv]; |