WebSVN – planix.SVN – Blame – /ports/trunk/editors/vimp9src/vim71/src/hashtab.c

Rev	Author	Line No.	Line
100	7u83	1	`/* vi:set ts=8 sts=4 sw=4:`
		2	`*`
		3	`* VIM - Vi IMproved by Bram Moolenaar`
		4	`*`
		5	`* Do ":help uganda" in Vim to read copying and usage conditions.`
		6	`* Do ":help credits" in Vim to see a list of people who contributed.`
		7	`* See README.txt for an overview of the Vim source code.`
		8	`*/`
		9
		10	`/*`
		11	`* hashtab.c: Handling of a hashtable with Vim-specific properties.`
		12	`*`
		13	`* Each item in a hashtable has a NUL terminated string key. A key can appear`
		14	`* only once in the table.`
		15	`*`
		16	`* A hash number is computed from the key for quick lookup. When the hashes`
		17	`* of two different keys point to the same entry an algorithm is used to`
		18	`* iterate over other entries in the table until the right one is found.`
		19	`* To make the iteration work removed keys are different from entries where a`
		20	`* key was never present.`
		21	`*`
		22	`* The mechanism has been partly based on how Python Dictionaries are`
		23	`* implemented. The algorithm is from Knuth Vol. 3, Sec. 6.4.`
		24	`*`
		25	`* The hashtable grows to accommodate more entries when needed. At least 1/3`
		26	`* of the entries is empty to keep the lookup efficient (at the cost of extra`
		27	`* memory).`
		28	`*/`
		29
		30	`#include "vim.h"`
		31
		32	`#if defined(FEAT_EVAL) \|\| defined(FEAT_SYN_HL) \|\| defined(PROTO)`
		33
		34	`#if 0`
		35	`# define HT_DEBUG /* extra checks for table consistency and statistics */`
		36
		37	`static long hash_count_lookup = 0; /* count number of hashtab lookups */`
		38	`static long hash_count_perturb = 0; /* count number of "misses" */`
		39	`#endif`
		40
		41	`/* Magic value for algorithm that walks through the array. */`
		42	`#define PERTURB_SHIFT 5`
		43
		44	`static int hash_may_resize __ARGS((hashtab_T *ht, int minitems));`
		45
		46	`#if 0 /* currently not used */`
		47	`/*`
		48	`* Create an empty hash table.`
		49	`* Returns NULL when out of memory.`
		50	`*/`
		51	`hashtab_T *`
		52	`hash_create()`
		53	`{`
		54	`hashtab_T *ht;`
		55
		56	`ht = (hashtab_T *)alloc(sizeof(hashtab_T));`
		57	`if (ht != NULL)`
		58	`hash_init(ht);`
		59	`return ht;`
		60	`}`
		61	`#endif`
		62
		63	`/*`
		64	`* Initialize an empty hash table.`
		65	`*/`
		66	`void`
		67	`hash_init(ht)`
		68	`hashtab_T *ht;`
		69	`{`
		70	`/* This zeroes all "ht_" entries and all the "hi_key" in "ht_smallarray". */`
		71	`vim_memset(ht, 0, sizeof(hashtab_T));`
		72	`ht->ht_array = ht->ht_smallarray;`
		73	`ht->ht_mask = HT_INIT_SIZE - 1;`
		74	`}`
		75
		76	`/*`
		77	`* Free the array of a hash table. Does not free the items it contains!`
		78	`* If "ht" is not freed then you should call hash_init() next!`
		79	`*/`
		80	`void`
		81	`hash_clear(ht)`
		82	`hashtab_T *ht;`
		83	`{`
		84	`if (ht->ht_array != ht->ht_smallarray)`
		85	`vim_free(ht->ht_array);`
		86	`}`
		87
		88	`/*`
		89	`* Free the array of a hash table and all the keys it contains. The keys must`
		90	`* have been allocated. "off" is the offset from the start of the allocate`
		91	`* memory to the location of the key (it's always positive).`
		92	`*/`
		93	`void`
		94	`hash_clear_all(ht, off)`
		95	`hashtab_T *ht;`
		96	`int off;`
		97	`{`
		98	`long todo;`
		99	`hashitem_T *hi;`
		100
		101	`todo = (long)ht->ht_used;`
		102	`for (hi = ht->ht_array; todo > 0; ++hi)`
		103	`{`
		104	`if (!HASHITEM_EMPTY(hi))`
		105	`{`
		106	`vim_free(hi->hi_key - off);`
		107	`--todo;`
		108	`}`
		109	`}`
		110	`hash_clear(ht);`
		111	`}`
		112
		113	`/*`
		114	`* Find "key" in hashtable "ht". "key" must not be NULL.`
		115	`* Always returns a pointer to a hashitem. If the item was not found then`
		116	`* HASHITEM_EMPTY() is TRUE. The pointer is then the place where the key`
		117	`* would be added.`
		118	`* WARNING: The returned pointer becomes invalid when the hashtable is changed`
		119	`* (adding, setting or removing an item)!`
		120	`*/`
		121	`hashitem_T *`
		122	`hash_find(ht, key)`
		123	`hashtab_T *ht;`
		124	`char_u *key;`
		125	`{`
		126	`return hash_lookup(ht, key, hash_hash(key));`
		127	`}`
		128
		129	`/*`
		130	`* Like hash_find(), but caller computes "hash".`
		131	`*/`
		132	`hashitem_T *`
		133	`hash_lookup(ht, key, hash)`
		134	`hashtab_T *ht;`
		135	`char_u *key;`
		136	`hash_T hash;`
		137	`{`
		138	`hash_T perturb;`
		139	`hashitem_T *freeitem;`
		140	`hashitem_T *hi;`
		141	`int idx;`
		142
		143	`#ifdef HT_DEBUG`
		144	`++hash_count_lookup;`
		145	`#endif`
		146
		147	`/*`
		148	`* Quickly handle the most common situations:`
		149	`* - return if there is no item at all`
		150	`* - skip over a removed item`
		151	`* - return if the item matches`
		152	`*/`
		153	`idx = (int)(hash & ht->ht_mask);`
		154	`hi = &ht->ht_array[idx];`
		155
		156	`if (hi->hi_key == NULL)`
		157	`return hi;`
		158	`if (hi->hi_key == HI_KEY_REMOVED)`
		159	`freeitem = hi;`
		160	`else if (hi->hi_hash == hash && STRCMP(hi->hi_key, key) == 0)`
		161	`return hi;`
		162	`else`
		163	`freeitem = NULL;`
		164
		165	`/*`
		166	`* Need to search through the table to find the key. The algorithm`
		167	`* to step through the table starts with large steps, gradually becoming`
		168	`* smaller down to (1/4 table size + 1). This means it goes through all`
		169	`* table entries in the end.`
		170	`* When we run into a NULL key it's clear that the key isn't there.`
		171	`* Return the first available slot found (can be a slot of a removed`
		172	`* item).`
		173	`*/`
		174	`for (perturb = hash; ; perturb >>= PERTURB_SHIFT)`
		175	`{`
		176	`#ifdef HT_DEBUG`
		177	`++hash_count_perturb; /* count a "miss" for hashtab lookup */`
		178	`#endif`
		179	`idx = (int)((idx << 2) + idx + perturb + 1);`
		180	`hi = &ht->ht_array[idx & ht->ht_mask];`
		181	`if (hi->hi_key == NULL)`
		182	`return freeitem == NULL ? hi : freeitem;`
		183	`if (hi->hi_hash == hash`
		184	`&& hi->hi_key != HI_KEY_REMOVED`
		185	`&& STRCMP(hi->hi_key, key) == 0)`
		186	`return hi;`
		187	`if (hi->hi_key == HI_KEY_REMOVED && freeitem == NULL)`
		188	`freeitem = hi;`
		189	`}`
		190	`}`
		191
		192	`/*`
		193	`* Print the efficiency of hashtable lookups.`
		194	`* Useful when trying different hash algorithms.`
		195	`* Called when exiting.`
		196	`*/`
		197	`void`
		198	`hash_debug_results()`
		199	`{`
		200	`#ifdef HT_DEBUG`
		201	`fprintf(stderr, "\r\n\r\n\r\n\r\n");`
		202	`fprintf(stderr, "Number of hashtable lookups: %ld\r\n", hash_count_lookup);`
		203	`fprintf(stderr, "Number of perturb loops: %ld\r\n", hash_count_perturb);`
		204	`fprintf(stderr, "Percentage of perturb loops: %ld%%\r\n",`
		205	`hash_count_perturb * 100 / hash_count_lookup);`
		206	`#endif`
		207	`}`
		208
		209	`/*`
		210	`* Add item with key "key" to hashtable "ht".`
		211	`* Returns FAIL when out of memory or the key is already present.`
		212	`*/`
		213	`int`
		214	`hash_add(ht, key)`
		215	`hashtab_T *ht;`
		216	`char_u *key;`
		217	`{`
		218	`hash_T hash = hash_hash(key);`
		219	`hashitem_T *hi;`
		220
		221	`hi = hash_lookup(ht, key, hash);`
		222	`if (!HASHITEM_EMPTY(hi))`
		223	`{`
		224	`EMSG2(_(e_intern2), "hash_add()");`
		225	`return FAIL;`
		226	`}`
		227	`return hash_add_item(ht, hi, key, hash);`
		228	`}`
		229
		230	`/*`
		231	`* Add item "hi" with "key" to hashtable "ht". "key" must not be NULL and`
		232	`* "hi" must have been obtained with hash_lookup() and point to an empty item.`
		233	`* "hi" is invalid after this!`
		234	`* Returns OK or FAIL (out of memory).`
		235	`*/`
		236	`int`
		237	`hash_add_item(ht, hi, key, hash)`
		238	`hashtab_T *ht;`
		239	`hashitem_T *hi;`
		240	`char_u *key;`
		241	`hash_T hash;`
		242	`{`
		243	`/* If resizing failed before and it fails again we can't add an item. */`
		244	`if (ht->ht_error && hash_may_resize(ht, 0) == FAIL)`
		245	`return FAIL;`
		246
		247	`++ht->ht_used;`
		248	`if (hi->hi_key == NULL)`
		249	`++ht->ht_filled;`
		250	`hi->hi_key = key;`
		251	`hi->hi_hash = hash;`
		252
		253	`/* When the space gets low may resize the array. */`
		254	`return hash_may_resize(ht, 0);`
		255	`}`
		256
		257	`#if 0 /* not used */`
		258	`/*`
		259	`* Overwrite hashtable item "hi" with "key". "hi" must point to the item that`
		260	`* is to be overwritten. Thus the number of items in the hashtable doesn't`
		261	`* change.`
		262	`* Although the key must be identical, the pointer may be different, thus it's`
		263	`* set anyway (the key is part of an item with that key).`
		264	`* The caller must take care of freeing the old item.`
		265	`* "hi" is invalid after this!`
		266	`*/`
		267	`void`
		268	`hash_set(hi, key)`
		269	`hashitem_T *hi;`
		270	`char_u *key;`
		271	`{`
		272	`hi->hi_key = key;`
		273	`}`
		274	`#endif`
		275
		276	`/*`
		277	`* Remove item "hi" from hashtable "ht". "hi" must have been obtained with`
		278	`* hash_lookup().`
		279	`* The caller must take care of freeing the item itself.`
		280	`*/`
		281	`void`
		282	`hash_remove(ht, hi)`
		283	`hashtab_T *ht;`
		284	`hashitem_T *hi;`
		285	`{`
		286	`--ht->ht_used;`
		287	`hi->hi_key = HI_KEY_REMOVED;`
		288	`hash_may_resize(ht, 0);`
		289	`}`
		290
		291	`/*`
		292	`* Lock a hashtable: prevent that ht_array changes.`
		293	`* Don't use this when items are to be added!`
		294	`* Must call hash_unlock() later.`
		295	`*/`
		296	`void`
		297	`hash_lock(ht)`
		298	`hashtab_T *ht;`
		299	`{`
		300	`++ht->ht_locked;`
		301	`}`
		302
		303	`#if 0 /* currently not used */`
		304	`/*`
		305	`* Lock a hashtable at the specified number of entries.`
		306	`* Caller must make sure no more than "size" entries will be added.`
		307	`* Must call hash_unlock() later.`
		308	`*/`
		309	`void`
		310	`hash_lock_size(ht, size)`
		311	`hashtab_T *ht;`
		312	`int size;`
		313	`{`
		314	`(void)hash_may_resize(ht, size);`
		315	`++ht->ht_locked;`
		316	`}`
		317	`#endif`
		318
		319	`/*`
		320	`* Unlock a hashtable: allow ht_array changes again.`
		321	`* Table will be resized (shrink) when necessary.`
		322	`* This must balance a call to hash_lock().`
		323	`*/`
		324	`void`
		325	`hash_unlock(ht)`
		326	`hashtab_T *ht;`
		327	`{`
		328	`--ht->ht_locked;`
		329	`(void)hash_may_resize(ht, 0);`
		330	`}`
		331
		332	`/*`
		333	`* Shrink a hashtable when there is too much empty space.`
		334	`* Grow a hashtable when there is not enough empty space.`
		335	`* Returns OK or FAIL (out of memory).`
		336	`*/`
		337	`static int`
		338	`hash_may_resize(ht, minitems)`
		339	`hashtab_T *ht;`
		340	`int minitems; /* minimal number of items */`
		341	`{`
		342	`hashitem_T temparray[HT_INIT_SIZE];`
		343	`hashitem_T oldarray, newarray;`
		344	`hashitem_T olditem, newitem;`
		345	`int newi;`
		346	`int todo;`
		347	`long_u oldsize, newsize;`
		348	`long_u minsize;`
		349	`long_u newmask;`
		350	`hash_T perturb;`
		351
		352	`/* Don't resize a locked table. */`
		353	`if (ht->ht_locked > 0)`
		354	`return OK;`
		355
		356	`#ifdef HT_DEBUG`
		357	`if (ht->ht_used > ht->ht_filled)`
		358	`EMSG("hash_may_resize(): more used than filled");`
		359	`if (ht->ht_filled >= ht->ht_mask + 1)`
		360	`EMSG("hash_may_resize(): table completely filled");`
		361	`#endif`
		362
		363	`if (minitems == 0)`
		364	`{`
		365	`/* Return quickly for small tables with at least two NULL items. NULL`
		366	`* items are required for the lookup to decide a key isn't there. */`
		367	`if (ht->ht_filled < HT_INIT_SIZE - 1`
		368	`&& ht->ht_array == ht->ht_smallarray)`
		369	`return OK;`
		370
		371	`/*`
		372	`* Grow or refill the array when it's more than 2/3 full (including`
		373	`* removed items, so that they get cleaned up).`
		374	`* Shrink the array when it's less than 1/5 full. When growing it is`
		375	`* at least 1/4 full (avoids repeated grow-shrink operations)`
		376	`*/`
		377	`oldsize = ht->ht_mask + 1;`
		378	`if (ht->ht_filled * 3 < oldsize * 2 && ht->ht_used > oldsize / 5)`
		379	`return OK;`
		380
		381	`if (ht->ht_used > 1000)`
		382	`minsize = ht->ht_used * 2; /* it's big, don't make too much room */`
		383	`else`
		384	`minsize = ht->ht_used * 4; /* make plenty of room */`
		385	`}`
		386	`else`
		387	`{`
		388	`/* Use specified size. */`
		389	`if ((long_u)minitems < ht->ht_used) /* just in case... */`
		390	`minitems = (int)ht->ht_used;`
		391	`minsize = minitems * 3 / 2; /* array is up to 2/3 full */`
		392	`}`
		393
		394	`newsize = HT_INIT_SIZE;`
		395	`while (newsize < minsize)`
		396	`{`
		397	`newsize <<= 1; /* make sure it's always a power of 2 */`
		398	`if (newsize == 0)`
		399	`return FAIL; /* overflow */`
		400	`}`
		401
		402	`if (newsize == HT_INIT_SIZE)`
		403	`{`
		404	`/* Use the small array inside the hashdict structure. */`
		405	`newarray = ht->ht_smallarray;`
		406	`if (ht->ht_array == newarray)`
		407	`{`
		408	`/* Moving from ht_smallarray to ht_smallarray! Happens when there`
		409	`* are many removed items. Copy the items to be able to clean up`
		410	`* removed items. */`
		411	`mch_memmove(temparray, newarray, sizeof(temparray));`
		412	`oldarray = temparray;`
		413	`}`
		414	`else`
		415	`oldarray = ht->ht_array;`
		416	`}`
		417	`else`
		418	`{`
		419	`/* Allocate an array. */`
		420	`newarray = (hashitem_T *)alloc((unsigned)`
		421	`(sizeof(hashitem_T) * newsize));`
		422	`if (newarray == NULL)`
		423	`{`
		424	`/* Out of memory. When there are NULL items still return OK.`
		425	`* Otherwise set ht_error, because lookup may result in a hang if`
		426	`* we add another item. */`
		427	`if (ht->ht_filled < ht->ht_mask)`
		428	`return OK;`
		429	`ht->ht_error = TRUE;`
		430	`return FAIL;`
		431	`}`
		432	`oldarray = ht->ht_array;`
		433	`}`
		434	`vim_memset(newarray, 0, (size_t)(sizeof(hashitem_T) * newsize));`
		435
		436	`/*`
		437	`* Move all the items from the old array to the new one, placing them in`
		438	`* the right spot. The new array won't have any removed items, thus this`
		439	`* is also a cleanup action.`
		440	`*/`
		441	`newmask = newsize - 1;`
		442	`todo = (int)ht->ht_used;`
		443	`for (olditem = oldarray; todo > 0; ++olditem)`
		444	`if (!HASHITEM_EMPTY(olditem))`
		445	`{`
		446	`/*`
		447	`* The algorithm to find the spot to add the item is identical to`
		448	`* the algorithm to find an item in hash_lookup(). But we only`
		449	`* need to search for a NULL key, thus it's simpler.`
		450	`*/`
		451	`newi = (int)(olditem->hi_hash & newmask);`
		452	`newitem = &newarray[newi];`
		453
		454	`if (newitem->hi_key != NULL)`
		455	`for (perturb = olditem->hi_hash; ; perturb >>= PERTURB_SHIFT)`
		456	`{`
		457	`newi = (int)((newi << 2) + newi + perturb + 1);`
		458	`newitem = &newarray[newi & newmask];`
		459	`if (newitem->hi_key == NULL)`
		460	`break;`
		461	`}`
		462	`newitem = olditem;`
		463	`--todo;`
		464	`}`
		465
		466	`if (ht->ht_array != ht->ht_smallarray)`
		467	`vim_free(ht->ht_array);`
		468	`ht->ht_array = newarray;`
		469	`ht->ht_mask = newmask;`
		470	`ht->ht_filled = ht->ht_used;`
		471	`ht->ht_error = FALSE;`
		472
		473	`return OK;`
		474	`}`
		475
		476	`/*`
		477	`* Get the hash number for a key.`
		478	`* If you think you know a better hash function: Compile with HT_DEBUG set and`
		479	`* run a script that uses hashtables a lot. Vim will then print statistics`
		480	`* when exiting. Try that with the current hash algorithm and yours. The`
		481	`* lower the percentage the better.`
		482	`*/`
		483	`hash_T`
		484	`hash_hash(key)`
		485	`char_u *key;`
		486	`{`
		487	`hash_T hash;`
		488	`char_u *p;`
		489
		490	`if ((hash = *key) == 0)`
		491	`return (hash_T)0; /* Empty keys are not allowed, but we don't`
		492	`want to crash if we get one. */`
		493	`p = key + 1;`
		494
		495	`#if 0`
		496	`/* ElfHash algorithm, which is supposed to have an even distribution.`
		497	`* Suggested by Charles Campbell. */`
		498	`hash_T g;`
		499
		500	`while (*p != NUL)`
		501	`{`
		502	`hash = (hash << 4) + p++; / clear low 4 bits of hash, add char */`
		503	`g = hash & 0xf0000000L; /* g has high 4 bits of hash only */`
		504	`if (g != 0)`
		505	`hash ^= g >> 24; /* xor g's high 4 bits into hash */`
		506	`}`
		507	`#else`
		508
		509	`/* A simplistic algorithm that appears to do very well.`
		510	`* Suggested by George Reilly. */`
		511	`while (*p != NUL)`
		512	`hash = hash * 101 + *p++;`
		513	`#endif`
		514
		515	`return hash;`
		516	`}`
		517
		518	`#endif`

Subversion Repositories planix.SVN

(root)/ports/trunk/editors/vimp9src/vim71/src/hashtab.c – Rev 100