From: Linus Torvalds Date: Fri, 21 Mar 2008 20:16:24 +0000 (-0700) Subject: Move name hashing functions into a file of its own X-Git-Tag: v1.5.6-rc0~68^2~3^2~7 X-Git-Url: http://git.tremily.us/?a=commitdiff_plain;h=96872bc200c41407607019c1f0fb005840f576a2;p=git.git Move name hashing functions into a file of its own It's really totally separate functionality, and if we want to start doing case-insensitive hash lookups, I'd rather do it when it's separated out. It also renames "remove_index_entry()" to "remove_name_hash()", because that really describes the thing better. It doesn't actually remove the index entry, that's done by "remove_index_entry_at()", which is something very different, despite the similarity in names. Signed-off-by: Linus Torvalds Signed-off-by: Junio C Hamano --- diff --git a/Makefile b/Makefile index 78b773862..390b37b94 100644 --- a/Makefile +++ b/Makefile @@ -422,6 +422,7 @@ LIB_OBJS += log-tree.o LIB_OBJS += mailmap.o LIB_OBJS += match-trees.o LIB_OBJS += merge-file.o +LIB_OBJS += name-hash.o LIB_OBJS += object.o LIB_OBJS += pack-check.o LIB_OBJS += pack-revindex.o diff --git a/builtin-read-tree.c b/builtin-read-tree.c index e9cfd2bbc..7ac30883b 100644 --- a/builtin-read-tree.c +++ b/builtin-read-tree.c @@ -40,7 +40,7 @@ static int read_cache_unmerged(void) for (i = 0; i < active_nr; i++) { struct cache_entry *ce = active_cache[i]; if (ce_stage(ce)) { - remove_index_entry(ce); + remove_name_hash(ce); if (last && !strcmp(ce->name, last->name)) continue; cache_tree_invalidate_path(active_cache_tree, ce->name); diff --git a/cache.h b/cache.h index 2a1e7ec6b..2afc788f7 100644 --- a/cache.h +++ b/cache.h @@ -153,20 +153,6 @@ static inline void copy_cache_entry(struct cache_entry *dst, struct cache_entry dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state; } -/* - * We don't actually *remove* it, we can just mark it invalid so that - * we won't find it in lookups. - * - * Not only would we have to search the lists (simple enough), but - * we'd also have to rehash other hash buckets in case this makes the - * hash bucket empty (common). So it's much better to just mark - * it. - */ -static inline void remove_index_entry(struct cache_entry *ce) -{ - ce->ce_flags |= CE_UNHASHED; -} - static inline unsigned create_ce_flags(size_t len, unsigned stage) { if (len >= CE_NAMEMASK) @@ -241,6 +227,23 @@ struct index_state { extern struct index_state the_index; +/* Name hashing */ +extern void add_name_hash(struct index_state *istate, struct cache_entry *ce); +/* + * We don't actually *remove* it, we can just mark it invalid so that + * we won't find it in lookups. + * + * Not only would we have to search the lists (simple enough), but + * we'd also have to rehash other hash buckets in case this makes the + * hash bucket empty (common). So it's much better to just mark + * it. + */ +static inline void remove_name_hash(struct cache_entry *ce) +{ + ce->ce_flags |= CE_UNHASHED; +} + + #ifndef NO_THE_INDEX_COMPATIBILITY_MACROS #define active_cache (the_index.cache) #define active_nr (the_index.cache_nr) diff --git a/name-hash.c b/name-hash.c new file mode 100644 index 000000000..e56eb16c2 --- /dev/null +++ b/name-hash.c @@ -0,0 +1,73 @@ +/* + * name-hash.c + * + * Hashing names in the index state + * + * Copyright (C) 2008 Linus Torvalds + */ +#define NO_THE_INDEX_COMPATIBILITY_MACROS +#include "cache.h" + +static unsigned int hash_name(const char *name, int namelen) +{ + unsigned int hash = 0x123; + + do { + unsigned char c = *name++; + hash = hash*101 + c; + } while (--namelen); + return hash; +} + +static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) +{ + void **pos; + unsigned int hash; + + if (ce->ce_flags & CE_HASHED) + return; + ce->ce_flags |= CE_HASHED; + ce->next = NULL; + hash = hash_name(ce->name, ce_namelen(ce)); + pos = insert_hash(hash, ce, &istate->name_hash); + if (pos) { + ce->next = *pos; + *pos = ce; + } +} + +static void lazy_init_name_hash(struct index_state *istate) +{ + int nr; + + if (istate->name_hash_initialized) + return; + for (nr = 0; nr < istate->cache_nr; nr++) + hash_index_entry(istate, istate->cache[nr]); + istate->name_hash_initialized = 1; +} + +void add_name_hash(struct index_state *istate, struct cache_entry *ce) +{ + ce->ce_flags &= ~CE_UNHASHED; + if (istate->name_hash_initialized) + hash_index_entry(istate, ce); +} + +int index_name_exists(struct index_state *istate, const char *name, int namelen) +{ + unsigned int hash = hash_name(name, namelen); + struct cache_entry *ce; + + lazy_init_name_hash(istate); + ce = lookup_hash(hash, &istate->name_hash); + + while (ce) { + if (!(ce->ce_flags & CE_UNHASHED)) { + if (!cache_name_compare(name, namelen, ce->name, ce->ce_flags)) + return 1; + } + ce = ce->next; + } + return 0; +} diff --git a/read-cache.c b/read-cache.c index a92b25b59..5dc998d21 100644 --- a/read-cache.c +++ b/read-cache.c @@ -23,80 +23,21 @@ struct index_state the_index; -static unsigned int hash_name(const char *name, int namelen) -{ - unsigned int hash = 0x123; - - do { - unsigned char c = *name++; - hash = hash*101 + c; - } while (--namelen); - return hash; -} - -static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) -{ - void **pos; - unsigned int hash; - - if (ce->ce_flags & CE_HASHED) - return; - ce->ce_flags |= CE_HASHED; - ce->next = NULL; - hash = hash_name(ce->name, ce_namelen(ce)); - pos = insert_hash(hash, ce, &istate->name_hash); - if (pos) { - ce->next = *pos; - *pos = ce; - } -} - -static void lazy_init_name_hash(struct index_state *istate) -{ - int nr; - - if (istate->name_hash_initialized) - return; - for (nr = 0; nr < istate->cache_nr; nr++) - hash_index_entry(istate, istate->cache[nr]); - istate->name_hash_initialized = 1; -} - static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { - ce->ce_flags &= ~CE_UNHASHED; istate->cache[nr] = ce; - if (istate->name_hash_initialized) - hash_index_entry(istate, ce); + add_name_hash(istate, ce); } static void replace_index_entry(struct index_state *istate, int nr, struct cache_entry *ce) { struct cache_entry *old = istate->cache[nr]; - remove_index_entry(old); + remove_name_hash(old); set_index_entry(istate, nr, ce); istate->cache_changed = 1; } -int index_name_exists(struct index_state *istate, const char *name, int namelen) -{ - unsigned int hash = hash_name(name, namelen); - struct cache_entry *ce; - - lazy_init_name_hash(istate); - ce = lookup_hash(hash, &istate->name_hash); - - while (ce) { - if (!(ce->ce_flags & CE_UNHASHED)) { - if (!cache_name_compare(name, namelen, ce->name, ce->ce_flags)) - return 1; - } - ce = ce->next; - } - return 0; -} - /* * This only updates the "non-critical" parts of the directory * cache, ie the parts that aren't tracked by GIT, and only used @@ -438,7 +379,7 @@ int remove_index_entry_at(struct index_state *istate, int pos) { struct cache_entry *ce = istate->cache[pos]; - remove_index_entry(ce); + remove_name_hash(ce); istate->cache_changed = 1; istate->cache_nr--; if (pos >= istate->cache_nr)