HCoop Git - bpt/guile.git/blame_incremental

... / ...

Commit	Line	Data
	1	/* Extended regular expression matching and search library.
	2	Copyright (C) 2002-2013 Free Software Foundation, Inc.
	3	This file is part of the GNU C Library.
	4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
	5
	6	The GNU C Library is free software; you can redistribute it and/or
	7	modify it under the terms of the GNU Lesser General Public
	8	License as published by the Free Software Foundation; either
	9	version 2.1 of the License, or (at your option) any later version.
	10
	11	The GNU C Library is distributed in the hope that it will be useful,
	12	but WITHOUT ANY WARRANTY; without even the implied warranty of
	13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	14	Lesser General Public License for more details.
	15
	16	You should have received a copy of the GNU Lesser General Public
	17	License along with the GNU C Library; if not, see
	18	<http://www.gnu.org/licenses/>. */
	19
	20	static void re_string_construct_common (const char *str, Idx len,
	21	re_string_t *pstr,
	22	RE_TRANSLATE_TYPE trans, bool icase,
	23	const re_dfa_t *dfa) internal_function;
	24	static re_dfastate_t create_ci_newstate (const re_dfa_t dfa,
	25	const re_node_set *nodes,
	26	re_hashval_t hash) internal_function;
	27	static re_dfastate_t create_cd_newstate (const re_dfa_t dfa,
	28	const re_node_set *nodes,
	29	unsigned int context,
	30	re_hashval_t hash) internal_function;
	31	\f
	32	/* Functions for string operation. */
	33
	34	/* This function allocate the buffers. It is necessary to call
	35	re_string_reconstruct before using the object. */
	36
	37	static reg_errcode_t
	38	internal_function __attribute_warn_unused_result__
	39	re_string_allocate (re_string_t pstr, const char str, Idx len, Idx init_len,
	40	RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
	41	{
	42	reg_errcode_t ret;
	43	Idx init_buf_len;
	44
	45	/* Ensure at least one character fits into the buffers. */
	46	if (init_len < dfa->mb_cur_max)
	47	init_len = dfa->mb_cur_max;
	48	init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
	49	re_string_construct_common (str, len, pstr, trans, icase, dfa);
	50
	51	ret = re_string_realloc_buffers (pstr, init_buf_len);
	52	if (BE (ret != REG_NOERROR, 0))
	53	return ret;
	54
	55	pstr->word_char = dfa->word_char;
	56	pstr->word_ops_used = dfa->word_ops_used;
	57	pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
	58	pstr->valid_len = (pstr->mbs_allocated \|\| dfa->mb_cur_max > 1) ? 0 : len;
	59	pstr->valid_raw_len = pstr->valid_len;
	60	return REG_NOERROR;
	61	}
	62
	63	/* This function allocate the buffers, and initialize them. */
	64
	65	static reg_errcode_t
	66	internal_function __attribute_warn_unused_result__
	67	re_string_construct (re_string_t pstr, const char str, Idx len,
	68	RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
	69	{
	70	reg_errcode_t ret;
	71	memset (pstr, '\0', sizeof (re_string_t));
	72	re_string_construct_common (str, len, pstr, trans, icase, dfa);
	73
	74	if (len > 0)
	75	{
	76	ret = re_string_realloc_buffers (pstr, len + 1);
	77	if (BE (ret != REG_NOERROR, 0))
	78	return ret;
	79	}
	80	pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
	81
	82	if (icase)
	83	{
	84	#ifdef RE_ENABLE_I18N
	85	if (dfa->mb_cur_max > 1)
	86	{
	87	while (1)
	88	{
	89	ret = build_wcs_upper_buffer (pstr);
	90	if (BE (ret != REG_NOERROR, 0))
	91	return ret;
	92	if (pstr->valid_raw_len >= len)
	93	break;
	94	if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
	95	break;
	96	ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
	97	if (BE (ret != REG_NOERROR, 0))
	98	return ret;
	99	}
	100	}
	101	else
	102	#endif /* RE_ENABLE_I18N */
	103	build_upper_buffer (pstr);
	104	}
	105	else
	106	{
	107	#ifdef RE_ENABLE_I18N
	108	if (dfa->mb_cur_max > 1)
	109	build_wcs_buffer (pstr);
	110	else
	111	#endif /* RE_ENABLE_I18N */
	112	{
	113	if (trans != NULL)
	114	re_string_translate_buffer (pstr);
	115	else
	116	{
	117	pstr->valid_len = pstr->bufs_len;
	118	pstr->valid_raw_len = pstr->bufs_len;
	119	}
	120	}
	121	}
	122
	123	return REG_NOERROR;
	124	}
	125
	126	/* Helper functions for re_string_allocate, and re_string_construct. */
	127
	128	static reg_errcode_t
	129	internal_function __attribute_warn_unused_result__
	130	re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
	131	{
	132	#ifdef RE_ENABLE_I18N
	133	if (pstr->mb_cur_max > 1)
	134	{
	135	wint_t *new_wcs;
	136
	137	/* Avoid overflow in realloc. */
	138	const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
	139	if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_buf_len, 0))
	140	return REG_ESPACE;
	141
	142	new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
	143	if (BE (new_wcs == NULL, 0))
	144	return REG_ESPACE;
	145	pstr->wcs = new_wcs;
	146	if (pstr->offsets != NULL)
	147	{
	148	Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
	149	if (BE (new_offsets == NULL, 0))
	150	return REG_ESPACE;
	151	pstr->offsets = new_offsets;
	152	}
	153	}
	154	#endif /* RE_ENABLE_I18N */
	155	if (pstr->mbs_allocated)
	156	{
	157	unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
	158	new_buf_len);
	159	if (BE (new_mbs == NULL, 0))
	160	return REG_ESPACE;
	161	pstr->mbs = new_mbs;
	162	}
	163	pstr->bufs_len = new_buf_len;
	164	return REG_NOERROR;
	165	}
	166
	167
	168	static void
	169	internal_function
	170	re_string_construct_common (const char str, Idx len, re_string_t pstr,
	171	RE_TRANSLATE_TYPE trans, bool icase,
	172	const re_dfa_t *dfa)
	173	{
	174	pstr->raw_mbs = (const unsigned char *) str;
	175	pstr->len = len;
	176	pstr->raw_len = len;
	177	pstr->trans = trans;
	178	pstr->icase = icase;
	179	pstr->mbs_allocated = (trans != NULL \|\| icase);
	180	pstr->mb_cur_max = dfa->mb_cur_max;
	181	pstr->is_utf8 = dfa->is_utf8;
	182	pstr->map_notascii = dfa->map_notascii;
	183	pstr->stop = pstr->len;
	184	pstr->raw_stop = pstr->stop;
	185	}
	186
	187	#ifdef RE_ENABLE_I18N
	188
	189	/* Build wide character buffer PSTR->WCS.
	190	If the byte sequence of the string are:
	191	<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
	192	Then wide character buffer will be:
	193	<wc1> , WEOF , <wc2> , WEOF , <wc3>
	194	We use WEOF for padding, they indicate that the position isn't
	195	a first byte of a multibyte character.
	196
	197	Note that this function assumes PSTR->VALID_LEN elements are already
	198	built and starts from PSTR->VALID_LEN. */
	199
	200	static void
	201	internal_function
	202	build_wcs_buffer (re_string_t *pstr)
	203	{
	204	#ifdef _LIBC
	205	unsigned char buf[MB_LEN_MAX];
	206	assert (MB_LEN_MAX >= pstr->mb_cur_max);
	207	#else
	208	unsigned char buf[64];
	209	#endif
	210	mbstate_t prev_st;
	211	Idx byte_idx, end_idx, remain_len;
	212	size_t mbclen;
	213
	214	/* Build the buffers from pstr->valid_len to either pstr->len or
	215	pstr->bufs_len. */
	216	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
	217	for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
	218	{
	219	wchar_t wc;
	220	const char *p;
	221
	222	remain_len = end_idx - byte_idx;
	223	prev_st = pstr->cur_state;
	224	/* Apply the translation if we need. */
	225	if (BE (pstr->trans != NULL, 0))
	226	{
	227	int i, ch;
	228
	229	for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
	230	{
	231	ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
	232	buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
	233	}
	234	p = (const char *) buf;
	235	}
	236	else
	237	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
	238	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
	239	if (BE (mbclen == (size_t) -1 \|\| mbclen == 0
	240	\|\| (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
	241	{
	242	/* We treat these cases as a singlebyte character. */
	243	mbclen = 1;
	244	wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
	245	if (BE (pstr->trans != NULL, 0))
	246	wc = pstr->trans[wc];
	247	pstr->cur_state = prev_st;
	248	}
	249	else if (BE (mbclen == (size_t) -2, 0))
	250	{
	251	/* The buffer doesn't have enough space, finish to build. */
	252	pstr->cur_state = prev_st;
	253	break;
	254	}
	255
	256	/* Write wide character and padding. */
	257	pstr->wcs[byte_idx++] = wc;
	258	/* Write paddings. */
	259	for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
	260	pstr->wcs[byte_idx++] = WEOF;
	261	}
	262	pstr->valid_len = byte_idx;
	263	pstr->valid_raw_len = byte_idx;
	264	}
	265
	266	/* Build wide character buffer PSTR->WCS like build_wcs_buffer,
	267	but for REG_ICASE. */
	268
	269	static reg_errcode_t
	270	internal_function __attribute_warn_unused_result__
	271	build_wcs_upper_buffer (re_string_t *pstr)
	272	{
	273	mbstate_t prev_st;
	274	Idx src_idx, byte_idx, end_idx, remain_len;
	275	size_t mbclen;
	276	#ifdef _LIBC
	277	char buf[MB_LEN_MAX];
	278	assert (MB_LEN_MAX >= pstr->mb_cur_max);
	279	#else
	280	char buf[64];
	281	#endif
	282
	283	byte_idx = pstr->valid_len;
	284	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
	285
	286	/* The following optimization assumes that ASCII characters can be
	287	mapped to wide characters with a simple cast. */
	288	if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
	289	{
	290	while (byte_idx < end_idx)
	291	{
	292	wchar_t wc;
	293
	294	if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
	295	&& mbsinit (&pstr->cur_state))
	296	{
	297	/* In case of a singlebyte character. */
	298	pstr->mbs[byte_idx]
	299	= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
	300	/* The next step uses the assumption that wchar_t is encoded
	301	ASCII-safe: all ASCII values can be converted like this. */
	302	pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
	303	++byte_idx;
	304	continue;
	305	}
	306
	307	remain_len = end_idx - byte_idx;
	308	prev_st = pstr->cur_state;
	309	mbclen = __mbrtowc (&wc,
	310	((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
	311	+ byte_idx), remain_len, &pstr->cur_state);
	312	if (BE (mbclen < (size_t) -2, 1))
	313	{
	314	wchar_t wcu = wc;
	315	if (iswlower (wc))
	316	{
	317	size_t mbcdlen;
	318
	319	wcu = towupper (wc);
	320	mbcdlen = wcrtomb (buf, wcu, &prev_st);
	321	if (BE (mbclen == mbcdlen, 1))
	322	memcpy (pstr->mbs + byte_idx, buf, mbclen);
	323	else
	324	{
	325	src_idx = byte_idx;
	326	goto offsets_needed;
	327	}
	328	}
	329	else
	330	memcpy (pstr->mbs + byte_idx,
	331	pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
	332	pstr->wcs[byte_idx++] = wcu;
	333	/* Write paddings. */
	334	for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
	335	pstr->wcs[byte_idx++] = WEOF;
	336	}
	337	else if (mbclen == (size_t) -1 \|\| mbclen == 0
	338	\|\| (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
	339	{
	340	/* It is an invalid character, an incomplete character
	341	at the end of the string, or '\0'. Just use the byte. */
	342	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
	343	pstr->mbs[byte_idx] = ch;
	344	/* And also cast it to wide char. */
	345	pstr->wcs[byte_idx++] = (wchar_t) ch;
	346	if (BE (mbclen == (size_t) -1, 0))
	347	pstr->cur_state = prev_st;
	348	}
	349	else
	350	{
	351	/* The buffer doesn't have enough space, finish to build. */
	352	pstr->cur_state = prev_st;
	353	break;
	354	}
	355	}
	356	pstr->valid_len = byte_idx;
	357	pstr->valid_raw_len = byte_idx;
	358	return REG_NOERROR;
	359	}
	360	else
	361	for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
	362	{
	363	wchar_t wc;
	364	const char *p;
	365	offsets_needed:
	366	remain_len = end_idx - byte_idx;
	367	prev_st = pstr->cur_state;
	368	if (BE (pstr->trans != NULL, 0))
	369	{
	370	int i, ch;
	371
	372	for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
	373	{
	374	ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
	375	buf[i] = pstr->trans[ch];
	376	}
	377	p = (const char *) buf;
	378	}
	379	else
	380	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
	381	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
	382	if (BE (mbclen < (size_t) -2, 1))
	383	{
	384	wchar_t wcu = wc;
	385	if (iswlower (wc))
	386	{
	387	size_t mbcdlen;
	388
	389	wcu = towupper (wc);
	390	mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
	391	if (BE (mbclen == mbcdlen, 1))
	392	memcpy (pstr->mbs + byte_idx, buf, mbclen);
	393	else if (mbcdlen != (size_t) -1)
	394	{
	395	size_t i;
	396
	397	if (byte_idx + mbcdlen > pstr->bufs_len)
	398	{
	399	pstr->cur_state = prev_st;
	400	break;
	401	}
	402
	403	if (pstr->offsets == NULL)
	404	{
	405	pstr->offsets = re_malloc (Idx, pstr->bufs_len);
	406
	407	if (pstr->offsets == NULL)
	408	return REG_ESPACE;
	409	}
	410	if (!pstr->offsets_needed)
	411	{
	412	for (i = 0; i < (size_t) byte_idx; ++i)
	413	pstr->offsets[i] = i;
	414	pstr->offsets_needed = 1;
	415	}
	416
	417	memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
	418	pstr->wcs[byte_idx] = wcu;
	419	pstr->offsets[byte_idx] = src_idx;
	420	for (i = 1; i < mbcdlen; ++i)
	421	{
	422	pstr->offsets[byte_idx + i]
	423	= src_idx + (i < mbclen ? i : mbclen - 1);
	424	pstr->wcs[byte_idx + i] = WEOF;
	425	}
	426	pstr->len += mbcdlen - mbclen;
	427	if (pstr->raw_stop > src_idx)
	428	pstr->stop += mbcdlen - mbclen;
	429	end_idx = (pstr->bufs_len > pstr->len)
	430	? pstr->len : pstr->bufs_len;
	431	byte_idx += mbcdlen;
	432	src_idx += mbclen;
	433	continue;
	434	}
	435	else
	436	memcpy (pstr->mbs + byte_idx, p, mbclen);
	437	}
	438	else
	439	memcpy (pstr->mbs + byte_idx, p, mbclen);
	440
	441	if (BE (pstr->offsets_needed != 0, 0))
	442	{
	443	size_t i;
	444	for (i = 0; i < mbclen; ++i)
	445	pstr->offsets[byte_idx + i] = src_idx + i;
	446	}
	447	src_idx += mbclen;
	448
	449	pstr->wcs[byte_idx++] = wcu;
	450	/* Write paddings. */
	451	for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
	452	pstr->wcs[byte_idx++] = WEOF;
	453	}
	454	else if (mbclen == (size_t) -1 \|\| mbclen == 0
	455	\|\| (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
	456	{
	457	/* It is an invalid character or '\0'. Just use the byte. */
	458	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
	459
	460	if (BE (pstr->trans != NULL, 0))
	461	ch = pstr->trans [ch];
	462	pstr->mbs[byte_idx] = ch;
	463
	464	if (BE (pstr->offsets_needed != 0, 0))
	465	pstr->offsets[byte_idx] = src_idx;
	466	++src_idx;
	467
	468	/* And also cast it to wide char. */
	469	pstr->wcs[byte_idx++] = (wchar_t) ch;
	470	if (BE (mbclen == (size_t) -1, 0))
	471	pstr->cur_state = prev_st;
	472	}
	473	else
	474	{
	475	/* The buffer doesn't have enough space, finish to build. */
	476	pstr->cur_state = prev_st;
	477	break;
	478	}
	479	}
	480	pstr->valid_len = byte_idx;
	481	pstr->valid_raw_len = src_idx;
	482	return REG_NOERROR;
	483	}
	484
	485	/* Skip characters until the index becomes greater than NEW_RAW_IDX.
	486	Return the index. */
	487
	488	static Idx
	489	internal_function
	490	re_string_skip_chars (re_string_t pstr, Idx new_raw_idx, wint_t last_wc)
	491	{
	492	mbstate_t prev_st;
	493	Idx rawbuf_idx;
	494	size_t mbclen;
	495	wint_t wc = WEOF;
	496
	497	/* Skip the characters which are not necessary to check. */
	498	for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
	499	rawbuf_idx < new_raw_idx;)
	500	{
	501	wchar_t wc2;
	502	Idx remain_len = pstr->raw_len - rawbuf_idx;
	503	prev_st = pstr->cur_state;
	504	mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
	505	remain_len, &pstr->cur_state);
	506	if (BE (mbclen == (size_t) -2 \|\| mbclen == (size_t) -1 \|\| mbclen == 0, 0))
	507	{
	508	/* We treat these cases as a single byte character. */
	509	if (mbclen == 0 \|\| remain_len == 0)
	510	wc = L'\0';
	511	else
	512	wc = (unsigned char ) (pstr->raw_mbs + rawbuf_idx);
	513	mbclen = 1;
	514	pstr->cur_state = prev_st;
	515	}
	516	else
	517	wc = wc2;
	518	/* Then proceed the next character. */
	519	rawbuf_idx += mbclen;
	520	}
	521	*last_wc = wc;
	522	return rawbuf_idx;
	523	}
	524	#endif /* RE_ENABLE_I18N */
	525
	526	/* Build the buffer PSTR->MBS, and apply the translation if we need.
	527	This function is used in case of REG_ICASE. */
	528
	529	static void
	530	internal_function
	531	build_upper_buffer (re_string_t *pstr)
	532	{
	533	Idx char_idx, end_idx;
	534	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
	535
	536	for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
	537	{
	538	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
	539	if (BE (pstr->trans != NULL, 0))
	540	ch = pstr->trans[ch];
	541	if (islower (ch))
	542	pstr->mbs[char_idx] = toupper (ch);
	543	else
	544	pstr->mbs[char_idx] = ch;
	545	}
	546	pstr->valid_len = char_idx;
	547	pstr->valid_raw_len = char_idx;
	548	}
	549
	550	/* Apply TRANS to the buffer in PSTR. */
	551
	552	static void
	553	internal_function
	554	re_string_translate_buffer (re_string_t *pstr)
	555	{
	556	Idx buf_idx, end_idx;
	557	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
	558
	559	for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
	560	{
	561	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
	562	pstr->mbs[buf_idx] = pstr->trans[ch];
	563	}
	564
	565	pstr->valid_len = buf_idx;
	566	pstr->valid_raw_len = buf_idx;
	567	}
	568
	569	/* This function re-construct the buffers.
	570	Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
	571	convert to upper case in case of REG_ICASE, apply translation. */
	572
	573	static reg_errcode_t
	574	internal_function __attribute_warn_unused_result__
	575	re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
	576	{
	577	Idx offset;
	578
	579	if (BE (pstr->raw_mbs_idx <= idx, 0))
	580	offset = idx - pstr->raw_mbs_idx;
	581	else
	582	{
	583	/* Reset buffer. */
	584	#ifdef RE_ENABLE_I18N
	585	if (pstr->mb_cur_max > 1)
	586	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
	587	#endif /* RE_ENABLE_I18N */
	588	pstr->len = pstr->raw_len;
	589	pstr->stop = pstr->raw_stop;
	590	pstr->valid_len = 0;
	591	pstr->raw_mbs_idx = 0;
	592	pstr->valid_raw_len = 0;
	593	pstr->offsets_needed = 0;
	594	pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
	595	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF);
	596	if (!pstr->mbs_allocated)
	597	pstr->mbs = (unsigned char *) pstr->raw_mbs;
	598	offset = idx;
	599	}
	600
	601	if (BE (offset != 0, 1))
	602	{
	603	/* Should the already checked characters be kept? */
	604	if (BE (offset < pstr->valid_raw_len, 1))
	605	{
	606	/* Yes, move them to the front of the buffer. */
	607	#ifdef RE_ENABLE_I18N
	608	if (BE (pstr->offsets_needed, 0))
	609	{
	610	Idx low = 0, high = pstr->valid_len, mid;
	611	do
	612	{
	613	mid = (high + low) / 2;
	614	if (pstr->offsets[mid] > offset)
	615	high = mid;
	616	else if (pstr->offsets[mid] < offset)
	617	low = mid + 1;
	618	else
	619	break;
	620	}
	621	while (low < high);
	622	if (pstr->offsets[mid] < offset)
	623	++mid;
	624	pstr->tip_context = re_string_context_at (pstr, mid - 1,
	625	eflags);
	626	/* This can be quite complicated, so handle specially
	627	only the common and easy case where the character with
	628	different length representation of lower and upper
	629	case is present at or after offset. */
	630	if (pstr->valid_len > offset
	631	&& mid == offset && pstr->offsets[mid] == offset)
	632	{
	633	memmove (pstr->wcs, pstr->wcs + offset,
	634	(pstr->valid_len - offset) * sizeof (wint_t));
	635	memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
	636	pstr->valid_len -= offset;
	637	pstr->valid_raw_len -= offset;
	638	for (low = 0; low < pstr->valid_len; low++)
	639	pstr->offsets[low] = pstr->offsets[low + offset] - offset;
	640	}
	641	else
	642	{
	643	/* Otherwise, just find out how long the partial multibyte
	644	character at offset is and fill it with WEOF/255. */
	645	pstr->len = pstr->raw_len - idx + offset;
	646	pstr->stop = pstr->raw_stop - idx + offset;
	647	pstr->offsets_needed = 0;
	648	while (mid > 0 && pstr->offsets[mid - 1] == offset)
	649	--mid;
	650	while (mid < pstr->valid_len)
	651	if (pstr->wcs[mid] != WEOF)
	652	break;
	653	else
	654	++mid;
	655	if (mid == pstr->valid_len)
	656	pstr->valid_len = 0;
	657	else
	658	{
	659	pstr->valid_len = pstr->offsets[mid] - offset;
	660	if (pstr->valid_len)
	661	{
	662	for (low = 0; low < pstr->valid_len; ++low)
	663	pstr->wcs[low] = WEOF;
	664	memset (pstr->mbs, 255, pstr->valid_len);
	665	}
	666	}
	667	pstr->valid_raw_len = pstr->valid_len;
	668	}
	669	}
	670	else
	671	#endif
	672	{
	673	pstr->tip_context = re_string_context_at (pstr, offset - 1,
	674	eflags);
	675	#ifdef RE_ENABLE_I18N
	676	if (pstr->mb_cur_max > 1)
	677	memmove (pstr->wcs, pstr->wcs + offset,
	678	(pstr->valid_len - offset) * sizeof (wint_t));
	679	#endif /* RE_ENABLE_I18N */
	680	if (BE (pstr->mbs_allocated, 0))
	681	memmove (pstr->mbs, pstr->mbs + offset,
	682	pstr->valid_len - offset);
	683	pstr->valid_len -= offset;
	684	pstr->valid_raw_len -= offset;
	685	#if DEBUG
	686	assert (pstr->valid_len > 0);
	687	#endif
	688	}
	689	}
	690	else
	691	{
	692	#ifdef RE_ENABLE_I18N
	693	/* No, skip all characters until IDX. */
	694	Idx prev_valid_len = pstr->valid_len;
	695
	696	if (BE (pstr->offsets_needed, 0))
	697	{
	698	pstr->len = pstr->raw_len - idx + offset;
	699	pstr->stop = pstr->raw_stop - idx + offset;
	700	pstr->offsets_needed = 0;
	701	}
	702	#endif
	703	pstr->valid_len = 0;
	704	#ifdef RE_ENABLE_I18N
	705	if (pstr->mb_cur_max > 1)
	706	{
	707	Idx wcs_idx;
	708	wint_t wc = WEOF;
	709
	710	if (pstr->is_utf8)
	711	{
	712	const unsigned char raw, p, *end;
	713
	714	/* Special case UTF-8. Multi-byte chars start with any
	715	byte other than 0x80 - 0xbf. */
	716	raw = pstr->raw_mbs + pstr->raw_mbs_idx;
	717	end = raw + (offset - pstr->mb_cur_max);
	718	if (end < pstr->raw_mbs)
	719	end = pstr->raw_mbs;
	720	p = raw + offset - 1;
	721	#ifdef _LIBC
	722	/* We know the wchar_t encoding is UCS4, so for the simple
	723	case, ASCII characters, skip the conversion step. */
	724	if (isascii (*p) && BE (pstr->trans == NULL, 1))
	725	{
	726	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
	727	/* pstr->valid_len = 0; */
	728	wc = (wchar_t) *p;
	729	}
	730	else
	731	#endif
	732	for (; p >= end; --p)
	733	if ((*p & 0xc0) != 0x80)
	734	{
	735	mbstate_t cur_state;
	736	wchar_t wc2;
	737	Idx mlen = raw + pstr->len - p;
	738	unsigned char buf[6];
	739	size_t mbclen;
	740
	741	const unsigned char *pp = p;
	742	if (BE (pstr->trans != NULL, 0))
	743	{
	744	int i = mlen < 6 ? mlen : 6;
	745	while (--i >= 0)
	746	buf[i] = pstr->trans[p[i]];
	747	pp = buf;
	748	}
	749	/* XXX Don't use mbrtowc, we know which conversion
	750	to use (UTF-8 -> UCS4). */
	751	memset (&cur_state, 0, sizeof (cur_state));
	752	mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
	753	&cur_state);
	754	if (raw + offset - p <= mbclen
	755	&& mbclen < (size_t) -2)
	756	{
	757	memset (&pstr->cur_state, '\0',
	758	sizeof (mbstate_t));
	759	pstr->valid_len = mbclen - (raw + offset - p);
	760	wc = wc2;
	761	}
	762	break;
	763	}
	764	}
	765
	766	if (wc == WEOF)
	767	pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
	768	if (wc == WEOF)
	769	pstr->tip_context
	770	= re_string_context_at (pstr, prev_valid_len - 1, eflags);
	771	else
	772	pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
	773	&& IS_WIDE_WORD_CHAR (wc))
	774	? CONTEXT_WORD
	775	: ((IS_WIDE_NEWLINE (wc)
	776	&& pstr->newline_anchor)
	777	? CONTEXT_NEWLINE : 0));
	778	if (BE (pstr->valid_len, 0))
	779	{
	780	for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
	781	pstr->wcs[wcs_idx] = WEOF;
	782	if (pstr->mbs_allocated)
	783	memset (pstr->mbs, 255, pstr->valid_len);
	784	}
	785	pstr->valid_raw_len = pstr->valid_len;
	786	}
	787	else
	788	#endif /* RE_ENABLE_I18N */
	789	{
	790	int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
	791	pstr->valid_raw_len = 0;
	792	if (pstr->trans)
	793	c = pstr->trans[c];
	794	pstr->tip_context = (bitset_contain (pstr->word_char, c)
	795	? CONTEXT_WORD
	796	: ((IS_NEWLINE (c) && pstr->newline_anchor)
	797	? CONTEXT_NEWLINE : 0));
	798	}
	799	}
	800	if (!BE (pstr->mbs_allocated, 0))
	801	pstr->mbs += offset;
	802	}
	803	pstr->raw_mbs_idx = idx;
	804	pstr->len -= offset;
	805	pstr->stop -= offset;
	806
	807	/* Then build the buffers. */
	808	#ifdef RE_ENABLE_I18N
	809	if (pstr->mb_cur_max > 1)
	810	{
	811	if (pstr->icase)
	812	{
	813	reg_errcode_t ret = build_wcs_upper_buffer (pstr);
	814	if (BE (ret != REG_NOERROR, 0))
	815	return ret;
	816	}
	817	else
	818	build_wcs_buffer (pstr);
	819	}
	820	else
	821	#endif /* RE_ENABLE_I18N */
	822	if (BE (pstr->mbs_allocated, 0))
	823	{
	824	if (pstr->icase)
	825	build_upper_buffer (pstr);
	826	else if (pstr->trans != NULL)
	827	re_string_translate_buffer (pstr);
	828	}
	829	else
	830	pstr->valid_len = pstr->len;
	831
	832	pstr->cur_idx = 0;
	833	return REG_NOERROR;
	834	}
	835
	836	static unsigned char
	837	internal_function __attribute ((pure))
	838	re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
	839	{
	840	int ch;
	841	Idx off;
	842
	843	/* Handle the common (easiest) cases first. */
	844	if (BE (!pstr->mbs_allocated, 1))
	845	return re_string_peek_byte (pstr, idx);
	846
	847	#ifdef RE_ENABLE_I18N
	848	if (pstr->mb_cur_max > 1
	849	&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
	850	return re_string_peek_byte (pstr, idx);
	851	#endif
	852
	853	off = pstr->cur_idx + idx;
	854	#ifdef RE_ENABLE_I18N
	855	if (pstr->offsets_needed)
	856	off = pstr->offsets[off];
	857	#endif
	858
	859	ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
	860
	861	#ifdef RE_ENABLE_I18N
	862	/* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
	863	this function returns CAPITAL LETTER I instead of first byte of
	864	DOTLESS SMALL LETTER I. The latter would confuse the parser,
	865	since peek_byte_case doesn't advance cur_idx in any way. */
	866	if (pstr->offsets_needed && !isascii (ch))
	867	return re_string_peek_byte (pstr, idx);
	868	#endif
	869
	870	return ch;
	871	}
	872
	873	static unsigned char
	874	internal_function
	875	re_string_fetch_byte_case (re_string_t *pstr)
	876	{
	877	if (BE (!pstr->mbs_allocated, 1))
	878	return re_string_fetch_byte (pstr);
	879
	880	#ifdef RE_ENABLE_I18N
	881	if (pstr->offsets_needed)
	882	{
	883	Idx off;
	884	int ch;
	885
	886	/* For tr_TR.UTF-8 [[:islower:]] there is
	887	[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
	888	in that case the whole multi-byte character and return
	889	the original letter. On the other side, with
	890	[[: DOTLESS SMALL LETTER I return [[:I, as doing
	891	anything else would complicate things too much. */
	892
	893	if (!re_string_first_byte (pstr, pstr->cur_idx))
	894	return re_string_fetch_byte (pstr);
	895
	896	off = pstr->offsets[pstr->cur_idx];
	897	ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
	898
	899	if (! isascii (ch))
	900	return re_string_fetch_byte (pstr);
	901
	902	re_string_skip_bytes (pstr,
	903	re_string_char_size_at (pstr, pstr->cur_idx));
	904	return ch;
	905	}
	906	#endif
	907
	908	return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
	909	}
	910
	911	static void
	912	internal_function
	913	re_string_destruct (re_string_t *pstr)
	914	{
	915	#ifdef RE_ENABLE_I18N
	916	re_free (pstr->wcs);
	917	re_free (pstr->offsets);
	918	#endif /* RE_ENABLE_I18N */
	919	if (pstr->mbs_allocated)
	920	re_free (pstr->mbs);
	921	}
	922
	923	/* Return the context at IDX in INPUT. */
	924
	925	static unsigned int
	926	internal_function
	927	re_string_context_at (const re_string_t *input, Idx idx, int eflags)
	928	{
	929	int c;
	930	if (BE (! REG_VALID_INDEX (idx), 0))
	931	/* In this case, we use the value stored in input->tip_context,
	932	since we can't know the character in input->mbs[-1] here. */
	933	return input->tip_context;
	934	if (BE (idx == input->len, 0))
	935	return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
	936	: CONTEXT_NEWLINE \| CONTEXT_ENDBUF);
	937	#ifdef RE_ENABLE_I18N
	938	if (input->mb_cur_max > 1)
	939	{
	940	wint_t wc;
	941	Idx wc_idx = idx;
	942	while(input->wcs[wc_idx] == WEOF)
	943	{
	944	#ifdef DEBUG
	945	/* It must not happen. */
	946	assert (REG_VALID_INDEX (wc_idx));
	947	#endif
	948	--wc_idx;
	949	if (! REG_VALID_INDEX (wc_idx))
	950	return input->tip_context;
	951	}
	952	wc = input->wcs[wc_idx];
	953	if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
	954	return CONTEXT_WORD;
	955	return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
	956	? CONTEXT_NEWLINE : 0);
	957	}
	958	else
	959	#endif
	960	{
	961	c = re_string_byte_at (input, idx);
	962	if (bitset_contain (input->word_char, c))
	963	return CONTEXT_WORD;
	964	return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
	965	}
	966	}
	967	\f
	968	/* Functions for set operation. */
	969
	970	static reg_errcode_t
	971	internal_function __attribute_warn_unused_result__
	972	re_node_set_alloc (re_node_set *set, Idx size)
	973	{
	974	set->alloc = size;
	975	set->nelem = 0;
	976	set->elems = re_malloc (Idx, size);
	977	if (BE (set->elems == NULL, 0) && (MALLOC_0_IS_NONNULL \|\| size != 0))
	978	return REG_ESPACE;
	979	return REG_NOERROR;
	980	}
	981
	982	static reg_errcode_t
	983	internal_function __attribute_warn_unused_result__
	984	re_node_set_init_1 (re_node_set *set, Idx elem)
	985	{
	986	set->alloc = 1;
	987	set->nelem = 1;
	988	set->elems = re_malloc (Idx, 1);
	989	if (BE (set->elems == NULL, 0))
	990	{
	991	set->alloc = set->nelem = 0;
	992	return REG_ESPACE;
	993	}
	994	set->elems[0] = elem;
	995	return REG_NOERROR;
	996	}
	997
	998	static reg_errcode_t
	999	internal_function __attribute_warn_unused_result__
	1000	re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
	1001	{
	1002	set->alloc = 2;
	1003	set->elems = re_malloc (Idx, 2);
	1004	if (BE (set->elems == NULL, 0))
	1005	return REG_ESPACE;
	1006	if (elem1 == elem2)
	1007	{
	1008	set->nelem = 1;
	1009	set->elems[0] = elem1;
	1010	}
	1011	else
	1012	{
	1013	set->nelem = 2;
	1014	if (elem1 < elem2)
	1015	{
	1016	set->elems[0] = elem1;
	1017	set->elems[1] = elem2;
	1018	}
	1019	else
	1020	{
	1021	set->elems[0] = elem2;
	1022	set->elems[1] = elem1;
	1023	}
	1024	}
	1025	return REG_NOERROR;
	1026	}
	1027
	1028	static reg_errcode_t
	1029	internal_function __attribute_warn_unused_result__
	1030	re_node_set_init_copy (re_node_set dest, const re_node_set src)
	1031	{
	1032	dest->nelem = src->nelem;
	1033	if (src->nelem > 0)
	1034	{
	1035	dest->alloc = dest->nelem;
	1036	dest->elems = re_malloc (Idx, dest->alloc);
	1037	if (BE (dest->elems == NULL, 0))
	1038	{
	1039	dest->alloc = dest->nelem = 0;
	1040	return REG_ESPACE;
	1041	}
	1042	memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
	1043	}
	1044	else
	1045	re_node_set_init_empty (dest);
	1046	return REG_NOERROR;
	1047	}
	1048
	1049	/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
	1050	DEST. Return value indicate the error code or REG_NOERROR if succeeded.
	1051	Note: We assume dest->elems is NULL, when dest->alloc is 0. */
	1052
	1053	static reg_errcode_t
	1054	internal_function __attribute_warn_unused_result__
	1055	re_node_set_add_intersect (re_node_set dest, const re_node_set src1,
	1056	const re_node_set *src2)
	1057	{
	1058	Idx i1, i2, is, id, delta, sbase;
	1059	if (src1->nelem == 0 \|\| src2->nelem == 0)
	1060	return REG_NOERROR;
	1061
	1062	/* We need dest->nelem + 2 * elems_in_intersection; this is a
	1063	conservative estimate. */
	1064	if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
	1065	{
	1066	Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
	1067	Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
	1068	if (BE (new_elems == NULL, 0))
	1069	return REG_ESPACE;
	1070	dest->elems = new_elems;
	1071	dest->alloc = new_alloc;
	1072	}
	1073
	1074	/* Find the items in the intersection of SRC1 and SRC2, and copy
	1075	into the top of DEST those that are not already in DEST itself. */
	1076	sbase = dest->nelem + src1->nelem + src2->nelem;
	1077	i1 = src1->nelem - 1;
	1078	i2 = src2->nelem - 1;
	1079	id = dest->nelem - 1;
	1080	for (;;)
	1081	{
	1082	if (src1->elems[i1] == src2->elems[i2])
	1083	{
	1084	/* Try to find the item in DEST. Maybe we could binary search? */
	1085	while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
	1086	--id;
	1087
	1088	if (! REG_VALID_INDEX (id) \|\| dest->elems[id] != src1->elems[i1])
	1089	dest->elems[--sbase] = src1->elems[i1];
	1090
	1091	if (! REG_VALID_INDEX (--i1) \|\| ! REG_VALID_INDEX (--i2))
	1092	break;
	1093	}
	1094
	1095	/* Lower the highest of the two items. */
	1096	else if (src1->elems[i1] < src2->elems[i2])
	1097	{
	1098	if (! REG_VALID_INDEX (--i2))
	1099	break;
	1100	}
	1101	else
	1102	{
	1103	if (! REG_VALID_INDEX (--i1))
	1104	break;
	1105	}
	1106	}
	1107
	1108	id = dest->nelem - 1;
	1109	is = dest->nelem + src1->nelem + src2->nelem - 1;
	1110	delta = is - sbase + 1;
	1111
	1112	/* Now copy. When DELTA becomes zero, the remaining
	1113	DEST elements are already in place; this is more or
	1114	less the same loop that is in re_node_set_merge. */
	1115	dest->nelem += delta;
	1116	if (delta > 0 && REG_VALID_INDEX (id))
	1117	for (;;)
	1118	{
	1119	if (dest->elems[is] > dest->elems[id])
	1120	{
	1121	/* Copy from the top. */
	1122	dest->elems[id + delta--] = dest->elems[is--];
	1123	if (delta == 0)
	1124	break;
	1125	}
	1126	else
	1127	{
	1128	/* Slide from the bottom. */
	1129	dest->elems[id + delta] = dest->elems[id];
	1130	if (! REG_VALID_INDEX (--id))
	1131	break;
	1132	}
	1133	}
	1134
	1135	/* Copy remaining SRC elements. */
	1136	memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
	1137
	1138	return REG_NOERROR;
	1139	}
	1140
	1141	/* Calculate the union set of the sets SRC1 and SRC2. And store it to
	1142	DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
	1143
	1144	static reg_errcode_t
	1145	internal_function __attribute_warn_unused_result__
	1146	re_node_set_init_union (re_node_set dest, const re_node_set src1,
	1147	const re_node_set *src2)
	1148	{
	1149	Idx i1, i2, id;
	1150	if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
	1151	{
	1152	dest->alloc = src1->nelem + src2->nelem;
	1153	dest->elems = re_malloc (Idx, dest->alloc);
	1154	if (BE (dest->elems == NULL, 0))
	1155	return REG_ESPACE;
	1156	}
	1157	else
	1158	{
	1159	if (src1 != NULL && src1->nelem > 0)
	1160	return re_node_set_init_copy (dest, src1);
	1161	else if (src2 != NULL && src2->nelem > 0)
	1162	return re_node_set_init_copy (dest, src2);
	1163	else
	1164	re_node_set_init_empty (dest);
	1165	return REG_NOERROR;
	1166	}
	1167	for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
	1168	{
	1169	if (src1->elems[i1] > src2->elems[i2])
	1170	{
	1171	dest->elems[id++] = src2->elems[i2++];
	1172	continue;
	1173	}
	1174	if (src1->elems[i1] == src2->elems[i2])
	1175	++i2;
	1176	dest->elems[id++] = src1->elems[i1++];
	1177	}
	1178	if (i1 < src1->nelem)
	1179	{
	1180	memcpy (dest->elems + id, src1->elems + i1,
	1181	(src1->nelem - i1) * sizeof (Idx));
	1182	id += src1->nelem - i1;
	1183	}
	1184	else if (i2 < src2->nelem)
	1185	{
	1186	memcpy (dest->elems + id, src2->elems + i2,
	1187	(src2->nelem - i2) * sizeof (Idx));
	1188	id += src2->nelem - i2;
	1189	}
	1190	dest->nelem = id;
	1191	return REG_NOERROR;
	1192	}
	1193
	1194	/* Calculate the union set of the sets DEST and SRC. And store it to
	1195	DEST. Return value indicate the error code or REG_NOERROR if succeeded. */
	1196
	1197	static reg_errcode_t
	1198	internal_function __attribute_warn_unused_result__
	1199	re_node_set_merge (re_node_set dest, const re_node_set src)
	1200	{
	1201	Idx is, id, sbase, delta;
	1202	if (src == NULL \|\| src->nelem == 0)
	1203	return REG_NOERROR;
	1204	if (dest->alloc < 2 * src->nelem + dest->nelem)
	1205	{
	1206	Idx new_alloc = 2 * (src->nelem + dest->alloc);
	1207	Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
	1208	if (BE (new_buffer == NULL, 0))
	1209	return REG_ESPACE;
	1210	dest->elems = new_buffer;
	1211	dest->alloc = new_alloc;
	1212	}
	1213
	1214	if (BE (dest->nelem == 0, 0))
	1215	{
	1216	dest->nelem = src->nelem;
	1217	memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
	1218	return REG_NOERROR;
	1219	}
	1220
	1221	/* Copy into the top of DEST the items of SRC that are not
	1222	found in DEST. Maybe we could binary search in DEST? */
	1223	for (sbase = dest->nelem + 2 * src->nelem,
	1224	is = src->nelem - 1, id = dest->nelem - 1;
	1225	REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
	1226	{
	1227	if (dest->elems[id] == src->elems[is])
	1228	is--, id--;
	1229	else if (dest->elems[id] < src->elems[is])
	1230	dest->elems[--sbase] = src->elems[is--];
	1231	else /* if (dest->elems[id] > src->elems[is]) */
	1232	--id;
	1233	}
	1234
	1235	if (REG_VALID_INDEX (is))
	1236	{
	1237	/* If DEST is exhausted, the remaining items of SRC must be unique. */
	1238	sbase -= is + 1;
	1239	memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
	1240	}
	1241
	1242	id = dest->nelem - 1;
	1243	is = dest->nelem + 2 * src->nelem - 1;
	1244	delta = is - sbase + 1;
	1245	if (delta == 0)
	1246	return REG_NOERROR;
	1247
	1248	/* Now copy. When DELTA becomes zero, the remaining
	1249	DEST elements are already in place. */
	1250	dest->nelem += delta;
	1251	for (;;)
	1252	{
	1253	if (dest->elems[is] > dest->elems[id])
	1254	{
	1255	/* Copy from the top. */
	1256	dest->elems[id + delta--] = dest->elems[is--];
	1257	if (delta == 0)
	1258	break;
	1259	}
	1260	else
	1261	{
	1262	/* Slide from the bottom. */
	1263	dest->elems[id + delta] = dest->elems[id];
	1264	if (! REG_VALID_INDEX (--id))
	1265	{
	1266	/* Copy remaining SRC elements. */
	1267	memcpy (dest->elems, dest->elems + sbase,
	1268	delta * sizeof (Idx));
	1269	break;
	1270	}
	1271	}
	1272	}
	1273
	1274	return REG_NOERROR;
	1275	}
	1276
	1277	/* Insert the new element ELEM to the re_node_set* SET.
	1278	SET should not already have ELEM.
	1279	Return true if successful. */
	1280
	1281	static bool
	1282	internal_function __attribute_warn_unused_result__
	1283	re_node_set_insert (re_node_set *set, Idx elem)
	1284	{
	1285	Idx idx;
	1286	/* In case the set is empty. */
	1287	if (set->alloc == 0)
	1288	return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
	1289
	1290	if (BE (set->nelem, 0) == 0)
	1291	{
	1292	/* We already guaranteed above that set->alloc != 0. */
	1293	set->elems[0] = elem;
	1294	++set->nelem;
	1295	return true;
	1296	}
	1297
	1298	/* Realloc if we need. */
	1299	if (set->alloc == set->nelem)
	1300	{
	1301	Idx *new_elems;
	1302	set->alloc = set->alloc * 2;
	1303	new_elems = re_realloc (set->elems, Idx, set->alloc);
	1304	if (BE (new_elems == NULL, 0))
	1305	return false;
	1306	set->elems = new_elems;
	1307	}
	1308
	1309	/* Move the elements which follows the new element. Test the
	1310	first element separately to skip a check in the inner loop. */
	1311	if (elem < set->elems[0])
	1312	{
	1313	idx = 0;
	1314	for (idx = set->nelem; idx > 0; idx--)
	1315	set->elems[idx] = set->elems[idx - 1];
	1316	}
	1317	else
	1318	{
	1319	for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
	1320	set->elems[idx] = set->elems[idx - 1];
	1321	}
	1322
	1323	/* Insert the new element. */
	1324	set->elems[idx] = elem;
	1325	++set->nelem;
	1326	return true;
	1327	}
	1328
	1329	/* Insert the new element ELEM to the re_node_set* SET.
	1330	SET should not already have any element greater than or equal to ELEM.
	1331	Return true if successful. */
	1332
	1333	static bool
	1334	internal_function __attribute_warn_unused_result__
	1335	re_node_set_insert_last (re_node_set *set, Idx elem)
	1336	{
	1337	/* Realloc if we need. */
	1338	if (set->alloc == set->nelem)
	1339	{
	1340	Idx *new_elems;
	1341	set->alloc = (set->alloc + 1) * 2;
	1342	new_elems = re_realloc (set->elems, Idx, set->alloc);
	1343	if (BE (new_elems == NULL, 0))
	1344	return false;
	1345	set->elems = new_elems;
	1346	}
	1347
	1348	/* Insert the new element. */
	1349	set->elems[set->nelem++] = elem;
	1350	return true;
	1351	}
	1352
	1353	/* Compare two node sets SET1 and SET2.
	1354	Return true if SET1 and SET2 are equivalent. */
	1355
	1356	static bool
	1357	internal_function __attribute ((pure))
	1358	re_node_set_compare (const re_node_set set1, const re_node_set set2)
	1359	{
	1360	Idx i;
	1361	if (set1 == NULL \|\| set2 == NULL \|\| set1->nelem != set2->nelem)
	1362	return false;
	1363	for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
	1364	if (set1->elems[i] != set2->elems[i])
	1365	return false;
	1366	return true;
	1367	}
	1368
	1369	/* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. */
	1370
	1371	static Idx
	1372	internal_function __attribute ((pure))
	1373	re_node_set_contains (const re_node_set *set, Idx elem)
	1374	{
	1375	__re_size_t idx, right, mid;
	1376	if (! REG_VALID_NONZERO_INDEX (set->nelem))
	1377	return 0;
	1378
	1379	/* Binary search the element. */
	1380	idx = 0;
	1381	right = set->nelem - 1;
	1382	while (idx < right)
	1383	{
	1384	mid = (idx + right) / 2;
	1385	if (set->elems[mid] < elem)
	1386	idx = mid + 1;
	1387	else
	1388	right = mid;
	1389	}
	1390	return set->elems[idx] == elem ? idx + 1 : 0;
	1391	}
	1392
	1393	static void
	1394	internal_function
	1395	re_node_set_remove_at (re_node_set *set, Idx idx)
	1396	{
	1397	if (idx < 0 \|\| idx >= set->nelem)
	1398	return;
	1399	--set->nelem;
	1400	for (; idx < set->nelem; idx++)
	1401	set->elems[idx] = set->elems[idx + 1];
	1402	}
	1403	\f
	1404
	1405	/* Add the token TOKEN to dfa->nodes, and return the index of the token.
	1406	Or return REG_MISSING if an error occurred. */
	1407
	1408	static Idx
	1409	internal_function
	1410	re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
	1411	{
	1412	if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
	1413	{
	1414	size_t new_nodes_alloc = dfa->nodes_alloc * 2;
	1415	Idx new_nexts, new_indices;
	1416	re_node_set new_edests, new_eclosures;
	1417	re_token_t *new_nodes;
	1418
	1419	/* Avoid overflows in realloc. */
	1420	const size_t max_object_size = MAX (sizeof (re_token_t),
	1421	MAX (sizeof (re_node_set),
	1422	sizeof (Idx)));
	1423	if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < new_nodes_alloc, 0))
	1424	return REG_MISSING;
	1425
	1426	new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
	1427	if (BE (new_nodes == NULL, 0))
	1428	return REG_MISSING;
	1429	dfa->nodes = new_nodes;
	1430	new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
	1431	new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
	1432	new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
	1433	new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
	1434	if (BE (new_nexts == NULL \|\| new_indices == NULL
	1435	\|\| new_edests == NULL \|\| new_eclosures == NULL, 0))
	1436	return REG_MISSING;
	1437	dfa->nexts = new_nexts;
	1438	dfa->org_indices = new_indices;
	1439	dfa->edests = new_edests;
	1440	dfa->eclosures = new_eclosures;
	1441	dfa->nodes_alloc = new_nodes_alloc;
	1442	}
	1443	dfa->nodes[dfa->nodes_len] = token;
	1444	dfa->nodes[dfa->nodes_len].constraint = 0;
	1445	#ifdef RE_ENABLE_I18N
	1446	dfa->nodes[dfa->nodes_len].accept_mb =
	1447	((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
	1448	\|\| token.type == COMPLEX_BRACKET);
	1449	#endif
	1450	dfa->nexts[dfa->nodes_len] = REG_MISSING;
	1451	re_node_set_init_empty (dfa->edests + dfa->nodes_len);
	1452	re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
	1453	return dfa->nodes_len++;
	1454	}
	1455
	1456	static re_hashval_t
	1457	internal_function
	1458	calc_state_hash (const re_node_set *nodes, unsigned int context)
	1459	{
	1460	re_hashval_t hash = nodes->nelem + context;
	1461	Idx i;
	1462	for (i = 0 ; i < nodes->nelem ; i++)
	1463	hash += nodes->elems[i];
	1464	return hash;
	1465	}
	1466
	1467	/* Search for the state whose node_set is equivalent to NODES.
	1468	Return the pointer to the state, if we found it in the DFA.
	1469	Otherwise create the new one and return it. In case of an error
	1470	return NULL and set the error code in ERR.
	1471	Note: - We assume NULL as the invalid state, then it is possible that
	1472	return value is NULL and ERR is REG_NOERROR.
	1473	- We never return non-NULL value in case of any errors, it is for
	1474	optimization. */
	1475
	1476	static re_dfastate_t *
	1477	internal_function __attribute_warn_unused_result__
	1478	re_acquire_state (reg_errcode_t err, const re_dfa_t dfa,
	1479	const re_node_set *nodes)
	1480	{
	1481	re_hashval_t hash;
	1482	re_dfastate_t *new_state;
	1483	struct re_state_table_entry *spot;
	1484	Idx i;
	1485	#ifdef lint
	1486	/* Suppress bogus uninitialized-variable warnings. */
	1487	*err = REG_NOERROR;
	1488	#endif
	1489	if (BE (nodes->nelem == 0, 0))
	1490	{
	1491	*err = REG_NOERROR;
	1492	return NULL;
	1493	}
	1494	hash = calc_state_hash (nodes, 0);
	1495	spot = dfa->state_table + (hash & dfa->state_hash_mask);
	1496
	1497	for (i = 0 ; i < spot->num ; i++)
	1498	{
	1499	re_dfastate_t *state = spot->array[i];
	1500	if (hash != state->hash)
	1501	continue;
	1502	if (re_node_set_compare (&state->nodes, nodes))
	1503	return state;
	1504	}
	1505
	1506	/* There are no appropriate state in the dfa, create the new one. */
	1507	new_state = create_ci_newstate (dfa, nodes, hash);
	1508	if (BE (new_state == NULL, 0))
	1509	*err = REG_ESPACE;
	1510
	1511	return new_state;
	1512	}
	1513
	1514	/* Search for the state whose node_set is equivalent to NODES and
	1515	whose context is equivalent to CONTEXT.
	1516	Return the pointer to the state, if we found it in the DFA.
	1517	Otherwise create the new one and return it. In case of an error
	1518	return NULL and set the error code in ERR.
	1519	Note: - We assume NULL as the invalid state, then it is possible that
	1520	return value is NULL and ERR is REG_NOERROR.
	1521	- We never return non-NULL value in case of any errors, it is for
	1522	optimization. */
	1523
	1524	static re_dfastate_t *
	1525	internal_function __attribute_warn_unused_result__
	1526	re_acquire_state_context (reg_errcode_t err, const re_dfa_t dfa,
	1527	const re_node_set *nodes, unsigned int context)
	1528	{
	1529	re_hashval_t hash;
	1530	re_dfastate_t *new_state;
	1531	struct re_state_table_entry *spot;
	1532	Idx i;
	1533	#ifdef lint
	1534	/* Suppress bogus uninitialized-variable warnings. */
	1535	*err = REG_NOERROR;
	1536	#endif
	1537	if (nodes->nelem == 0)
	1538	{
	1539	*err = REG_NOERROR;
	1540	return NULL;
	1541	}
	1542	hash = calc_state_hash (nodes, context);
	1543	spot = dfa->state_table + (hash & dfa->state_hash_mask);
	1544
	1545	for (i = 0 ; i < spot->num ; i++)
	1546	{
	1547	re_dfastate_t *state = spot->array[i];
	1548	if (state->hash == hash
	1549	&& state->context == context
	1550	&& re_node_set_compare (state->entrance_nodes, nodes))
	1551	return state;
	1552	}
	1553	/* There are no appropriate state in 'dfa', create the new one. */
	1554	new_state = create_cd_newstate (dfa, nodes, context, hash);
	1555	if (BE (new_state == NULL, 0))
	1556	*err = REG_ESPACE;
	1557
	1558	return new_state;
	1559	}
	1560
	1561	/* Finish initialization of the new state NEWSTATE, and using its hash value
	1562	HASH put in the appropriate bucket of DFA's state table. Return value
	1563	indicates the error code if failed. */
	1564
	1565	static reg_errcode_t
	1566	__attribute_warn_unused_result__
	1567	register_state (const re_dfa_t dfa, re_dfastate_t newstate,
	1568	re_hashval_t hash)
	1569	{
	1570	struct re_state_table_entry *spot;
	1571	reg_errcode_t err;
	1572	Idx i;
	1573
	1574	newstate->hash = hash;
	1575	err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
	1576	if (BE (err != REG_NOERROR, 0))
	1577	return REG_ESPACE;
	1578	for (i = 0; i < newstate->nodes.nelem; i++)
	1579	{
	1580	Idx elem = newstate->nodes.elems[i];
	1581	if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
	1582	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
	1583	return REG_ESPACE;
	1584	}
	1585
	1586	spot = dfa->state_table + (hash & dfa->state_hash_mask);
	1587	if (BE (spot->alloc <= spot->num, 0))
	1588	{
	1589	Idx new_alloc = 2 * spot->num + 2;
	1590	re_dfastate_t *new_array = re_realloc (spot->array, re_dfastate_t ,
	1591	new_alloc);
	1592	if (BE (new_array == NULL, 0))
	1593	return REG_ESPACE;
	1594	spot->array = new_array;
	1595	spot->alloc = new_alloc;
	1596	}
	1597	spot->array[spot->num++] = newstate;
	1598	return REG_NOERROR;
	1599	}
	1600
	1601	static void
	1602	free_state (re_dfastate_t *state)
	1603	{
	1604	re_node_set_free (&state->non_eps_nodes);
	1605	re_node_set_free (&state->inveclosure);
	1606	if (state->entrance_nodes != &state->nodes)
	1607	{
	1608	re_node_set_free (state->entrance_nodes);
	1609	re_free (state->entrance_nodes);
	1610	}
	1611	re_node_set_free (&state->nodes);
	1612	re_free (state->word_trtable);
	1613	re_free (state->trtable);
	1614	re_free (state);
	1615	}
	1616
	1617	/* Create the new state which is independent of contexts.
	1618	Return the new state if succeeded, otherwise return NULL. */
	1619
	1620	static re_dfastate_t *
	1621	internal_function __attribute_warn_unused_result__
	1622	create_ci_newstate (const re_dfa_t dfa, const re_node_set nodes,
	1623	re_hashval_t hash)
	1624	{
	1625	Idx i;
	1626	reg_errcode_t err;
	1627	re_dfastate_t *newstate;
	1628
	1629	newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
	1630	if (BE (newstate == NULL, 0))
	1631	return NULL;
	1632	err = re_node_set_init_copy (&newstate->nodes, nodes);
	1633	if (BE (err != REG_NOERROR, 0))
	1634	{
	1635	re_free (newstate);
	1636	return NULL;
	1637	}
	1638
	1639	newstate->entrance_nodes = &newstate->nodes;
	1640	for (i = 0 ; i < nodes->nelem ; i++)
	1641	{
	1642	re_token_t *node = dfa->nodes + nodes->elems[i];
	1643	re_token_type_t type = node->type;
	1644	if (type == CHARACTER && !node->constraint)
	1645	continue;
	1646	#ifdef RE_ENABLE_I18N
	1647	newstate->accept_mb \|= node->accept_mb;
	1648	#endif /* RE_ENABLE_I18N */
	1649
	1650	/* If the state has the halt node, the state is a halt state. */
	1651	if (type == END_OF_RE)
	1652	newstate->halt = 1;
	1653	else if (type == OP_BACK_REF)
	1654	newstate->has_backref = 1;
	1655	else if (type == ANCHOR \|\| node->constraint)
	1656	newstate->has_constraint = 1;
	1657	}
	1658	err = register_state (dfa, newstate, hash);
	1659	if (BE (err != REG_NOERROR, 0))
	1660	{
	1661	free_state (newstate);
	1662	newstate = NULL;
	1663	}
	1664	return newstate;
	1665	}
	1666
	1667	/* Create the new state which is depend on the context CONTEXT.
	1668	Return the new state if succeeded, otherwise return NULL. */
	1669
	1670	static re_dfastate_t *
	1671	internal_function __attribute_warn_unused_result__
	1672	create_cd_newstate (const re_dfa_t dfa, const re_node_set nodes,
	1673	unsigned int context, re_hashval_t hash)
	1674	{
	1675	Idx i, nctx_nodes = 0;
	1676	reg_errcode_t err;
	1677	re_dfastate_t *newstate;
	1678
	1679	newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
	1680	if (BE (newstate == NULL, 0))
	1681	return NULL;
	1682	err = re_node_set_init_copy (&newstate->nodes, nodes);
	1683	if (BE (err != REG_NOERROR, 0))
	1684	{
	1685	re_free (newstate);
	1686	return NULL;
	1687	}
	1688
	1689	newstate->context = context;
	1690	newstate->entrance_nodes = &newstate->nodes;
	1691
	1692	for (i = 0 ; i < nodes->nelem ; i++)
	1693	{
	1694	re_token_t *node = dfa->nodes + nodes->elems[i];
	1695	re_token_type_t type = node->type;
	1696	unsigned int constraint = node->constraint;
	1697
	1698	if (type == CHARACTER && !constraint)
	1699	continue;
	1700	#ifdef RE_ENABLE_I18N
	1701	newstate->accept_mb \|= node->accept_mb;
	1702	#endif /* RE_ENABLE_I18N */
	1703
	1704	/* If the state has the halt node, the state is a halt state. */
	1705	if (type == END_OF_RE)
	1706	newstate->halt = 1;
	1707	else if (type == OP_BACK_REF)
	1708	newstate->has_backref = 1;
	1709
	1710	if (constraint)
	1711	{
	1712	if (newstate->entrance_nodes == &newstate->nodes)
	1713	{
	1714	newstate->entrance_nodes = re_malloc (re_node_set, 1);
	1715	if (BE (newstate->entrance_nodes == NULL, 0))
	1716	{
	1717	free_state (newstate);
	1718	return NULL;
	1719	}
	1720	if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
	1721	!= REG_NOERROR)
	1722	return NULL;
	1723	nctx_nodes = 0;
	1724	newstate->has_constraint = 1;
	1725	}
	1726
	1727	if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
	1728	{
	1729	re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
	1730	++nctx_nodes;
	1731	}
	1732	}
	1733	}
	1734	err = register_state (dfa, newstate, hash);
	1735	if (BE (err != REG_NOERROR, 0))
	1736	{
	1737	free_state (newstate);
	1738	newstate = NULL;
	1739	}
	1740	return newstate;
	1741	}