/*--------------------------------------------------------------------------- * String Management * *--------------------------------------------------------------------------- * TODO:: Optimize for short strings to reduce overhead? * To reduce the memory used for string storage, the driver implements * string sharing: for every string the driver keeps track in a refcount * how many users it has. If the refcount falls back to 0, the string can * be safely deallocated again. On the other hand, if a refcount overflows * to 0, the string is considered a constant. * * To reduce memory usage even further, strings can be entered into a table. * On the creation of a new string the driver can lookup the table for * an already existing copy and return a reference to a string held therein. * This is used mainly for function names in programs, but also for * mapping keys. The table is organized as a hash table * with HTABLE_SIZE entries. * * Strings are sequences of chars, stored in an array of known size. The * size itself is stored separately, allowing the string to contain every * possible character. Internally the module appends a '\0' character * to the string data to make it somewhat compatible with C system * functions; however, this character itself is not counted in the size * of the string, and the module itself doesn't rely on it. * * Strings are managed using a single structure: string_t. * * struct string_s * { * struct { * Bool tabled : 1; * unsigned int ref : 31; * } info; * string_t * next; String table pointer. * size_t size; Length of the string * whash_t hash; 0, or the hash of the string * char txt[1.. .size]; * char null Gratuituous terminator * } * * The hash of the string is computed on-demand. Should the string hash * to value 0, the value 0x8000 is used instead - this way the usual * calculation (hash % tablesize) won't be affected. * * This string_t value is the one referenced by svalues and the like. * It allows the following string types: * * Untabled, freely allocated strings: * .tabled is FALSE * * Tabled (shared) strings: * .tabled is TRUE * .next is the hash chain pointer, the reference from the * table is not counted. * * TODO: Make functions mstr_add() resilient to receiving NULL * TODO:: pointers as args. This way stuff like rc = * TODO:: mstr_add(rc,...) will always work and we need to check * TODO:: for rc != NULL only at the end. * TODO: Distinguish between the allocated size of a string and the * TODO:: used size. To use this efficiently, functions like mstr_insert()... * TODO:: might become necessary. *--------------------------------------------------------------------------- */ #include "driver.h" #include #include "mstrings.h" #include "gcollect.h" #include "hash.h" #include "main.h" #include "simulate.h" #include "stdstrings.h" #include "strfuns.h" #include "svalue.h" #include "xalloc.h" #include "../mudlib/sys/debug_info.h" /*-------------------------------------------------------------------------*/ /* Adapt a hash value to our table size. */ #if !( (HTABLE_SIZE) & (HTABLE_SIZE)-1 ) # define HashToIndex(h) ((h) & ((HTABLE_SIZE)-1)) #else # define HashToIndex(h) ((h) % HTABLE_SIZE) #endif /*-------------------------------------------------------------------------*/ static string_t ** stringtable = NULL; /* The hashed string table: an array of pointers to the heads of * the string chains. */ /* Statistics */ mp_uint mstr_used = 0; /* Number of virtually allocated strings - every reference counts * as separate copy. */ mp_uint mstr_used_size = 0; /* Total virtual size of allocated strings counted * - every reference counts as separate copy. * This does include the memory by the string management structures. */ static mp_uint mstr_tabled_count = 0; /* Number of distinct strings in the string table. */ static mp_uint mstr_tabled_size = 0; /* Total memory held in the string table. */ static mp_uint mstr_chains = 0; /* Number of hash chains in the string table. */ static mp_uint mstr_added = 0; /* Number of distinct strings added to the string table. */ static mp_uint mstr_deleted = 0; /* Number of distinct strings deleted from the string table. */ static mp_uint mstr_collisions = 0; /* Number of collisions when adding a new distinct string. */ static mp_uint mstr_untabled_count = 0; /* Number of distinct untabled strings. */ static mp_uint mstr_untabled_size = 0; /* Total memory held in untabled strings. */ static mp_uint mstr_searchlen_byvalue = 0; /* Number of search steps along hash chains with content comparisons. */ static mp_uint mstr_searches_byvalue = 0; /* Number of searches in the string table with content comparison. */ static mp_uint mstr_found_byvalue = 0; /* Number of successful searches in the string table with content comparison. */ static mp_uint mstr_searchlen = 0; /* Number of search steps along hash chains without content comparisons. */ static mp_uint mstr_searches = 0; /* Number of searches in the string table without content comparisons. */ static mp_uint mstr_found = 0; /* Number of successful searches in the string table with content comparison. */ #ifdef EXT_STRING_STATS unsigned long stNumEqual = 0; unsigned long stNumHashEqual = 0; unsigned long stNumTabledEqual = 0; unsigned long stNumComp = 0; unsigned long stNumTabledComp = 0; unsigned long stNumTabledChecked = 0; unsigned long stNumTabledCheckedTable = 0; unsigned long stNumTabledCheckedSearch = 0; #endif /* EXT_STRING_STATS */ /*-------------------------------------------------------------------------*/ static INLINE whash_t hash_string_inl (const char * const s, size_t size) /* Compute the hash for string of length and return it. * The result will always be non-zero. */ { whash_t hash; hash = whashmem(s, size, MSTRING_HASH_LENGTH); if (!hash) hash = 1 << (sizeof (hash) * CHAR_BIT - 1); return hash; } /* hash_string_inl() */ whash_t hash_string (const char * const s, size_t size) { return hash_string_inl(s, size); } /*-------------------------------------------------------------------------*/ static INLINE whash_t get_hash (string_t * pStr) /* Return the hash of string , computing it if necessary. */ { if (!pStr->hash) pStr->hash = hash_string_inl(pStr->txt, pStr->size); return pStr->hash; } /* get_hash() */ /*-------------------------------------------------------------------------*/ whash_t mstring_get_hash (string_t * pStr) /* Aliased to: mstr_get_hash() * * Return the hash value of , computing it if necessary. */ { return get_hash(pStr); } /* mstring_get_hash() */ /*-------------------------------------------------------------------------*/ static INLINE string_t * find_and_move (const char * const s, size_t size, whash_t hash) /* If is a tabled string of length and in the related * stringtable chain: find it, move it to the head of the chain and return its * string_t*. * * If is not tabled, return NULL. */ { string_t *prev, *rover; int idx = HashToIndex(hash); mstr_searches_byvalue++; /* Find the string in the table */ mstr_searchlen_byvalue++; for ( prev = NULL, rover = stringtable[idx] ; rover != NULL && get_txt(rover) != s && !( size == mstrsize(rover) && hash == get_hash(rover) && 0 == memcmp(get_txt(rover), s, size) ) ; prev = rover, rover = rover->next ) mstr_searchlen_byvalue++; /* If the string is in the table (rover != NULL), but not at the beginning * of the chain, move it there. */ if (rover && prev) { prev->next = rover->next; rover->next = stringtable[idx]; stringtable[idx] = rover; } if (rover) mstr_found_byvalue++; return rover; } /* find_and_move() */ /*-------------------------------------------------------------------------*/ static INLINE string_t * move_to_head (string_t *s, int idx) /* If is a tabled string in the stringtable[] chain: move it to * the head of the chain and return its pointer. * If is not found in that chain, return NULL. */ { string_t *prev, *rover; mstr_searches++; /* Find the string in the table */ mstr_searchlen++; for ( prev = NULL, rover = stringtable[idx] ; rover != NULL && rover != s ; prev = rover, rover = rover->next ) { mstr_searchlen++; } /* If s is found (rover != NULL), but not at the beginning of the chain, * move it there */ if (rover && prev) { prev->next = rover->next; rover->next = stringtable[idx]; stringtable[idx] = rover; } if (rover) mstr_found++; return rover; } /* move_to_head() */ /*-------------------------------------------------------------------------*/ static INLINE string_t * make_new_tabled (const char * const pTxt, size_t size, whash_t hash MTRACE_DECL) /* Helper function for mstring_new_tabled() and mstring_new_n_tabled(). * * Create a new tabled string by copying the data string of length * and and return it counting the result as one reference. The * string MUST NOT yet exist in the table. * * If memory runs out, NULL is returned. */ { string_t * string; int idx = HashToIndex(hash); /* Get the memory for a new one */ string = xalloc_pass(size + sizeof(*string)); /* sizeof(*string) includes the extra data byte */ if (!string) return NULL; /* Set up the structures and table the string */ string->size = size; string->hash = hash; memcpy(string->txt, pTxt, size); string->txt[size] = '\0'; string->info.tabled = MY_TRUE; string->info.ref = 1; /* An uninitialized memory read at this point is ok: it's because * the bitfield is initialized in parts. */ mstr_added++; if (NULL == stringtable[idx]) mstr_chains++; else mstr_collisions++; string->next = stringtable[idx]; stringtable[idx] = string; { size_t msize; msize = mstr_mem_size(string); mstr_used++; mstr_used_size += msize; mstr_tabled_count++; mstr_tabled_size += msize; } return string; } /* make_new_tabled() */ /*-------------------------------------------------------------------------*/ string_t * mstring_alloc_string (size_t iSize MTRACE_DECL) /* Aliased to: alloc_mstring(iSize) * Also called by mstring_new_string(). * * Create a new untabled string with space for characters and * return it, counting the result as one reference. * * If memory runs out, NULL is returned. */ { string_t *string; /* Get the memory */ string = xalloc_pass(iSize + sizeof(*string)); if (!string) return NULL; /* Set up the structures */ string->size = iSize; string->next = NULL; string->hash = 0; string->txt[iSize] = '\0'; string->info.tabled = MY_FALSE; string->info.ref = 1; /* An uninitialized memory read at this point is ok: it's because * the bitfield is initialized in parts. */ { size_t msize; msize = mstr_mem_size(string); mstr_used++; mstr_used_size += msize; mstr_untabled_count++; mstr_untabled_size += msize; } return string; } /* mstring_alloc_string() */ /*-------------------------------------------------------------------------*/ string_t * mstring_new_string (const char * const pTxt MTRACE_DECL) /* Aliased to: new_mstring(pTxt) * * Create a new untabled string by copying the C string and * return it, counting the result as one reference. * * If memory runs out, NULL is returned. */ { string_t *string; size_t size; size = strlen(pTxt); string = mstring_alloc_string(size MTRACE_PASS); if (string && size) { memcpy(string->txt, pTxt, size); } return string; } /* mstring_new_string() */ /*-------------------------------------------------------------------------*/ string_t * mstring_new_n_string (const char * const pTxt, size_t len MTRACE_DECL) /* Aliased to: new_n_mstring(pTxt, len) * * Create a new untabled string by copying the characters at * and return it, counting the result as one reference. * * If memory runs out, NULL is returned. */ { string_t *string; string = mstring_alloc_string(len MTRACE_PASS); if (string && len) { memcpy(string->txt, pTxt, len); } return string; } /* mstring_new_n_string() */ /*-------------------------------------------------------------------------*/ string_t * mstring_new_tabled (const char * const pTxt MTRACE_DECL) /* Aliased to: new_tabled(pTxt) * * Create a new tabled string by copying the C string and * return it counting the result as one reference. If a tabled string * for the same already exists, a reference to that one is returned. * * If memory runs out, NULL is returned. */ { whash_t hash; size_t size; string_t * string; size = strlen(pTxt); hash = hash_string_inl(pTxt, size); /* Check if the string has already been tabled */ string = find_and_move(pTxt, size, hash); if (string) { return ref_mstring(string); } /* No: create a new one */ return make_new_tabled(pTxt, size, hash MTRACE_PASS); } /* mstring_new_tabled() */ /*-------------------------------------------------------------------------*/ string_t * mstring_new_n_tabled (const char * const pTxt, size_t size MTRACE_DECL) /* Aliased to: new_n_tabled(pTxt, len) * * Create a new tabled string by copying the C string of length * and return it counting the result as one reference. If a tabled string * for the same already exists, a reference to that one is returned. * * If memory runs out, NULL is returned. */ { whash_t hash; string_t * string; hash = hash_string_inl(pTxt, size); /* Check if the string has already been tabled */ string = find_and_move(pTxt, size, hash); if (string) { return ref_mstring(string); } /* No: create a new one */ return make_new_tabled(pTxt, size, hash MTRACE_PASS); } /* mstring_new_n_tabled() */ /*-------------------------------------------------------------------------*/ static string_t * table_string (string_t * pStr MTRACE_DECL) /* Called by: mstring_make_tabled() * * Table the string and return a pointer to the tabled string. * If is already tabled, it will also be the result. * If is not tabled, but a string of this content already exist, * the reference to the tabled string will be the result. * Otherwise, is added to the table and returned. * * Return NULL when out of memory. */ { string_t *string; whash_t hash; int idx; size_t size; size_t msize; /* If the string is already tabled, our work is done */ if (pStr->info.tabled) return pStr; msize = mstr_mem_size(pStr); /* Get or create the tabled string for this untabled one */ size = pStr->size; hash = get_hash(pStr); idx = HashToIndex(hash); /* Check if the string has already been tabled */ string = find_and_move(pStr->txt, size, hash); if (!string) { /* No: add the string into the table. */ pStr->info.tabled = MY_TRUE; mstr_added++; if (NULL == stringtable[idx]) mstr_chains++; else mstr_collisions++; pStr->next = stringtable[idx]; stringtable[idx] = pStr; mstr_tabled_count++; mstr_tabled_size += msize; mstr_untabled_count--; mstr_untabled_size -= msize; string = pStr; } /* That's all */ return string; } /* table_string() */ /*-------------------------------------------------------------------------*/ string_t * mstring_make_tabled (string_t * pStr, Bool deref_arg MTRACE_DECL) /* Aliased to: make_tabled(pStr) : deref_arg = MY_TRUE * make_tabled_from(pStr) : deref_arg = MY_FALSE * * Take the string and convert it into an tabled string if not already * tabled. * Return the counted reference to the tabled instance, and, if is * TRUE, dereference the once. * * Return NULL when out of memory. */ { string_t *string; /* Table the string one way or the other (always succeeds) */ string = table_string(pStr MTRACE_PASS); if (!string) return NULL; (void)ref_mstring(string); if (deref_arg) free_mstring(pStr); return string; } /* mstring_make_tabled() */ /*-------------------------------------------------------------------------*/ string_t * mstring_dup (string_t * pStr MTRACE_DECL) /* Aliased to: dup_mstring(pStr) * * Create and return a new untabled string with the same text as but * just one reference. * If memory runs out, NULL is returned. * * Purpose is to create an instance of a string which an be freely modified * (which is why .hash is cleared). * * See also: mstring_unshare(). */ { string_t *string; /* Create a new untabled string from the tabled one */ string = mstring_alloc_string(pStr->size MTRACE_PASS); if (string) { memcpy(string->txt, pStr->txt, pStr->size); } return string; } /* mstring_dup() */ /*-------------------------------------------------------------------------*/ string_t * mstring_unshare (string_t * pStr MTRACE_DECL) /* Aliased to: unshare_mstring(pStr) * * Like mstring_dup(), this function creates and returns an untabled string * with the same text as , and with just one reference. In contrast * to mstring_dup(), this function also dereferences on success (which * allows it to optimize certain cases). * If memory runs out, NULL is returned. * * Purpose is to create an instance of a string which an be freely modified * (which is why .hash is cleared). */ { string_t *string; /* Check for the easy cases where the argument string can be * the result: untabled and just one reference. */ if (!pStr->info.tabled && pStr->info.ref == 1) { pStr->hash = 0; return pStr; } /* Otherwise create a new untabled string from the tabled one */ string = mstring_alloc_string(pStr->size MTRACE_PASS); if (string) { memcpy(string->txt, pStr->txt, pStr->size); free_mstring(pStr); } return string; } /* mstring_unshare() */ /*-------------------------------------------------------------------------*/ string_t * mstring_resize (string_t * pStr, size_t newlen MTRACE_DECL) /* Aliased to: resize_mstring(pStr,newlen) * * Create an untabled copy of with just one reference and space * for bytes, remove one reference from , and then return * the new string. * If memory runs out, NULL is returned, but the original string is still * dereferenced. */ { string_t *string; /* Check for the easy case */ if (!pStr->info.tabled && pStr->info.ref == 1 && pStr->size == newlen) { pStr->hash = 0; return pStr; } /* Otherwise create a new untabled string from the tabled one */ string = mstring_alloc_string(newlen MTRACE_PASS); if (string) { if (newlen > pStr->size) memcpy(string->txt, pStr->txt, pStr->size); else memcpy(string->txt, pStr->txt, newlen); } free_mstring(pStr); return string; } /* mstring_resize() */ /*-------------------------------------------------------------------------*/ string_t * mstring_find_tabled (string_t * pStr) /* Aliased to: find_tabled(pStr) * * Find the tabled string with the same content as and return it. * If is a tabled string, it will be the result itself. * If there is no such tabled string, NULL is returned. * * The function does not change refcounts. */ { whash_t hash; size_t size; #ifdef EXT_STRING_STATS stNumTabledChecked++; #endif /* EXT_STRING_STATS */ /* If pStr is tabled, our work is done */ if (pStr->info.tabled) { #ifdef EXT_STRING_STATS stNumTabledCheckedTable++; #endif /* EXT_STRING_STATS */ return (string_t *)pStr; } /* Worst case: an untabled string we have to look for */ #ifdef EXT_STRING_STATS stNumTabledCheckedSearch++; #endif /* EXT_STRING_STATS */ size = mstrsize(pStr); hash = get_hash(pStr); return find_and_move(pStr->txt, size, hash); } /* mstring_find_tabled() */ /*-------------------------------------------------------------------------*/ string_t * mstring_find_tabled_str (const char * const pTxt, size_t size) /* Aliased to: find_tabled_str(pTxt), find_tabled_str_n(pTxt) * * Find the tabled string with the same content as the C string and * return it. * If there is no such tabled string, NULL is returned. * * The function does not change refcounts. */ { whash_t hash; hash = hash_string_inl(pTxt, size); return find_and_move(pTxt, size, hash); } /* mstring_find_tabled_str() */ /*-------------------------------------------------------------------------*/ void mstring_free (string_t *s) /* Aliased to: free_mstring(pStr) * * Decrement the refcount of string . If it reaches 0, deallocate it * altogether. */ { size_t msize; if (!s || !s->info.ref) return; msize = mstr_mem_size(s); mstr_used--; mstr_used_size -= msize; if (--(s->info.ref)) { return; } /* String has no refs left - deallocate it */ if (s->info.tabled) { /* A tabled string */ int idx; mstr_tabled_count--; mstr_tabled_size -= msize; idx = HashToIndex(get_hash(s)); if (NULL == move_to_head(s, idx)) { fatal("String %p (%s) doesn't hash to the same spot.\n" , s, s->txt ); } stringtable[idx] = s->next; if (NULL == stringtable[idx]) mstr_chains--; mstr_deleted++; } else { /* An untabled string */ mstr_untabled_count--; mstr_untabled_size -= msize; } /* The deallocation of the string itself is the same in either case. */ xfree(s); } /* mstring_free() */ /*-------------------------------------------------------------------------*/ Bool mstring_equal(string_t * const pStr1, string_t * const pStr2) /* Aliased to: mstreq(pStr1, pStr2) * * Compare the two strings and and return TRUE if they * have the same content, FALSE otherwise. */ { #ifdef EXT_STRING_STATS stNumEqual++; #endif /* EXT_STRING_STATS */ if (pStr1 == pStr2 || get_txt(pStr1) == get_txt(pStr2)) { #ifdef EXT_STRING_STATS if (mstr_tabled(pStr1)) stNumTabledEqual++; #endif /* EXT_STRING_STATS */ return MY_TRUE; } if (mstrsize(pStr1) != mstrsize(pStr2)) return MY_FALSE; if (get_hash(pStr1) != get_hash(pStr2)) { #ifdef EXT_STRING_STATS stNumHashEqual++; #endif /* EXT_STRING_STATS */ return MY_FALSE; } return (memcmp(get_txt(pStr1), get_txt(pStr2), mstrsize(pStr1)) == 0); } /* mstring_equal() */ /*-------------------------------------------------------------------------*/ int mstring_compare (string_t * const pStr1, string_t * const pStr2) /* Aliased to: mstrcmp(pStr1, pStr2) * * Compare the two strings and and return * -1 if < * 0 if == * +1 if > */ { int rc; #ifdef EXT_STRING_STATS stNumComp++; #endif /* EXT_STRING_STATS */ /* Compare for direct equality */ if (pStr1 == pStr2 || get_txt(pStr1) == get_txt(pStr2)) { #ifdef EXT_STRING_STATS if (mstr_tabled(pStr1)) stNumTabledComp++; #endif /* EXT_STRING_STATS */ return 0; } /* We have to compare two strings by byte. * Remember to take the difference in length into account when the * leading parts match. */ if (mstrsize(pStr1) == mstrsize(pStr2)) { rc = memcmp(get_txt(pStr1), get_txt(pStr2), mstrsize(pStr1)); return rc; } if (mstrsize(pStr1) < mstrsize(pStr2)) { rc = memcmp(get_txt(pStr1), get_txt(pStr2), mstrsize(pStr1)); return rc != 0 ? rc : -1; } rc = memcmp(get_txt(pStr1), get_txt(pStr2), mstrsize(pStr2)); return rc != 0 ? rc : 1; } /* mstring_compare() */ /*-------------------------------------------------------------------------*/ int mstring_order (string_t * const pStr1, string_t * const pStr2) /* Aliased to: mstr_order(pStr1, pStr2) * * Compare the two strings and and return * -1 if < * 0 if == * +1 if > * * Other than mstring_compare() this function does not implement * a lexicographic order, but instead a faster hash-centric order. * It is thus more useful for sorted arrays and mapping indices. */ { int rc; #ifdef EXT_STRING_STATS stNumComp++; #endif /* EXT_STRING_STATS */ /* Compare for direct equality */ if (pStr1 == pStr2 || get_txt(pStr1) == get_txt(pStr2)) { #ifdef EXT_STRING_STATS if (mstr_tabled(pStr1)) stNumTabledComp++; #endif /* EXT_STRING_STATS */ return 0; } /* Shorter strings are 'less' than longer strings */ { size_t size1 = mstrsize(pStr1); size_t size2 = mstrsize(pStr2); if (size1 != size2) return size1 < size2 ? -1 : 1; } /* Strings with a smaller hash also count as 'less'. */ { whash_t hash1 = get_hash(pStr1); whash_t hash2 = get_hash(pStr2); if (hash1 != hash2) return hash1 < hash2 ? -1 : 1; } /* Length and hash are identical - we have to compare byte by byte. */ rc = memcmp(get_txt(pStr1), get_txt(pStr2), mstrsize(pStr1)); return rc; } /* mstring_order() */ /*-------------------------------------------------------------------------*/ const char * mstring_mstr_n_str ( const string_t * const pStr, size_t start , const char * const pTxt, size_t len) /* Aliased to: mstrstr(pStr, pTxt) * * Find the partial string of bytes (which may contain '\0' as * part of the data to be found) inside of starting at position * and return a pointer to the location found. * If not found, return NULL. */ { const char * cp; size_t left; char first; if (start > mstrsize(pStr)) return NULL; /* Initialize 'characters remaining' and 'current position' */ left = mstrsize(pStr) - start; /* remove the const qualifier temporarily when calling get_txt(). */ cp = get_txt((string_t *const)pStr)+start; /* Special case: strstr("text", "") */ if (len == 0) return cp; first = *pTxt; while (left >= len) { const char * next; next = memchr(cp, first, left); if (NULL == next) break; left -= next - cp; if (left >= len && 0 == memcmp(next, pTxt, len)) return next; if (left > 0) { cp = next+1; left--; } } return NULL; } /* mstring_mstr_n_str() */ /*-------------------------------------------------------------------------*/ const char * mstring_mstr_rn_str ( const string_t * const pStr, size_t start , const char * const pTxt, size_t len) /* Aliased to: mstrrstr(pStr, pTxt) * * Find the partial string of bytes (which may contain '\0' as * part of the data to be found) inside of up to position * and return a pointer to the location found. * If not found, return NULL. */ { const char * cp; char first; if (start > mstrsize(pStr)) return NULL; /* Initialize 'current position' */ cp = get_txt((string_t *const)pStr)+start; /* Special case: strrstr("text", "") */ if (len == 0) return cp; first = *pTxt; cp++; /* Offset the first decrement */ do { cp--; if (*cp == first && 0 == memcmp(cp, pTxt, len) ) return cp; } while (cp != get_txt((string_t *const)pStr)); return NULL; } /* mstring_mstr_n_str() */ /*-------------------------------------------------------------------------*/ string_t * mstring_add_slash (const string_t *str MTRACE_DECL) /* Aliased to: add_slash(str) * * Create and return a new string with the data of prepended * by a slash ('/'). The result string is untabled and has one reference, * the old string is not changed. * * If memory runs out, NULL is returned. */ { string_t *tmp; char * txt; tmp = mstring_alloc_string(mstrsize(str)+1 MTRACE_PASS); if (tmp) { txt = get_txt(tmp); *txt = '/'; memcpy(txt+1, get_txt((string_t *const)str), mstrsize(str)); } return tmp; } /* mstring_add_slash() */ /*-------------------------------------------------------------------------*/ string_t * mstring_del_slash (string_t *str MTRACE_DECL) /* Aliased to: del_slash(str) * * Remove any given leading slash from the string and return the * resulting string. If has no slashed to begin with, the result * is a new reference to . * * If memory runs out, NULL is returned. */ { char * txt; txt = get_txt(str); while (*txt == '/') txt++; if (txt == get_txt(str)) return ref_mstring(str); return mstring_new_string(txt MTRACE_PASS); } /* mstring_del_slash() */ /*-------------------------------------------------------------------------*/ string_t * mstring_del_dotc (string_t *str MTRACE_DECL) /* Aliased to: del_dotc(str) * * If ends in a trailing ".c", create a new untabled string without * the suffix and return it. Otherwise return a new reference to . * * If memory runs out, NULL is returned. */ { string_t *tmp; size_t len; char * txt, *p; txt = get_txt(str); len = mstrsize(str); p = strrchr(txt, '.'); if (p && (size_t)(p - txt) + 2 == len && p[1] == 'c') len = (size_t)(p - txt); else return ref_mstring(str); tmp = mstring_alloc_string(len MTRACE_PASS); if (tmp) { memcpy(get_txt(tmp), txt, len); } return tmp; } /* mstring_del_dotc() */ /*-------------------------------------------------------------------------*/ string_t * mstring_cvt_progname (const string_t *str MTRACE_DECL) /* Aliased to: cvt_progname(str) * * is a program name: no leading slash, but a trailing '.c'. * Create and return a new string with the '.c' removed, and a leading slash * added if compat_mode is not set. * * The result string is untabled and has one reference, the old string * is not changed. * * If memory runs out, NULL is returned. */ { string_t *tmp; size_t len; const char * txt, *p; char *txt2; txt = get_txt((string_t *const)str); len = mstrsize(str); p = strrchr(txt, '.'); if (p) len = (size_t)(p - txt); if (!compat_mode) len++; tmp = mstring_alloc_string(len MTRACE_PASS); if (tmp) { txt2 = get_txt(tmp); if (!compat_mode) { *txt2 = '/'; txt2++; len--; } memcpy(txt2, txt, len); } return tmp; } /* mstring_cvt_progname() */ /*-------------------------------------------------------------------------*/ string_t * mstring_add (const string_t *left, const string_t *right MTRACE_DECL) /* Aliased to: mstr_add(left,right) * * Create and return a new string with the data of concatenated * with the data of . * The result string is untabled and has one reference, * the old strings and are not changed. * * If memory runs out, NULL is returned. */ { size_t lleft, lright; string_t *tmp; lleft = mstrsize(left); lright = mstrsize(right); tmp = mstring_alloc_string(lleft+lright MTRACE_PASS); if (tmp) { char * txt; txt = get_txt(tmp); memcpy(txt, get_txt((string_t *const)left), lleft); memcpy(txt+lleft, get_txt((string_t *const)right), lright); } return tmp; } /* mstring_add() */ /*-------------------------------------------------------------------------*/ string_t * mstring_add_txt (const string_t *left, const char *right, size_t len MTRACE_DECL) /* Aliased to: mstr_add_txt(left,right,len) * * Create and return a new string with the data of concatenated * with the bytes of data in buffer . * The result string is untabled and has one reference, * the old string is not changed. * * If memory runs out, NULL is returned. */ { size_t lleft; string_t *tmp; char * txt; lleft = mstrsize(left); tmp = mstring_alloc_string(lleft+len MTRACE_PASS); if (tmp) { txt = get_txt(tmp); memcpy(txt, get_txt((string_t *const)left), lleft); memcpy(txt+lleft, right, len); } return tmp; } /* mstring_add_txt() */ /*-------------------------------------------------------------------------*/ string_t * mstring_add_to_txt (const char *left, size_t len, const string_t *right MTRACE_DECL) /* Aliased to: mstr_add_to_txt(left,len,right) * * Create and return a new string with the bytes of data in buffer * concatenated with the string . * The result string is untabled and has one reference, * the old string is not changed. * * If memory runs out, NULL is returned. */ { size_t lright; string_t *tmp; char * txt; lright = mstrsize(right); tmp = mstring_alloc_string(lright+len MTRACE_PASS); if (tmp) { txt = get_txt(tmp); memcpy(txt, left, len); memcpy(txt+len, get_txt((string_t *const)right), lright); } return tmp; } /* mstring_add_to_txt() */ /*-------------------------------------------------------------------------*/ string_t * mstring_append (string_t *left, const string_t *right MTRACE_DECL) /* Aliased to: mstr_append(left,right) * * Create and return a new string with the data of concatenated * with the data of . * The result string is untabled and has one reference, * is dereferenced once (if not NULL). * the old strings is not changed. * * If memory runs out or if is already NULL, NULL is returned. */ { string_t *tmp; if (left == NULL) return NULL; tmp = mstring_add(left, right MTRACE_PASS); free_mstring(left); return tmp; } /* mstring_append() */ /*-------------------------------------------------------------------------*/ string_t * mstring_append_txt (string_t *left, const char *right, size_t len MTRACE_DECL) /* Aliased to: mstr_append_txt(left,right,len) * * Create and return a new string with the data of concatenated * with the bytes of data in buffer . * The result string is untabled and has one reference, * is dereferenced once (if not NULL). * * If memory runs out or if is already NULL, NULL is returned. */ { string_t *tmp; if (left == NULL) return NULL; tmp = mstring_add_txt(left, right, len MTRACE_PASS); free_mstring(left); return tmp; } /* mstring_append_txt() */ /*-------------------------------------------------------------------------*/ string_t * mstring_repeat (const string_t *base, size_t num MTRACE_DECL) /* Aliased to: mstr_repeat(base,num) * * Create and return a new string which is the string repeated * times. * The result string is untabled and has one reference, * the old string is not changed. * * If memory runs out, NULL is returned. */ { size_t len, reslen; string_t *result; len = mstrsize(base); reslen = len * num; result = mstring_alloc_string(reslen MTRACE_PASS); if (result && reslen) { size_t curlen; char * txt = get_txt(result); /* Seed result[] with one copy of the string */ memcpy(txt, get_txt((string_t *const)base), len); /* Repeatedly double the string in result */ curlen = len; while (2*curlen < reslen) { memcpy(txt+curlen, txt, curlen); curlen *= 2; } /* Fill up result to the full length */ if (reslen > curlen) memcpy(txt+curlen, txt, reslen-curlen); } return result; } /* mstring_repeat() */ /*-------------------------------------------------------------------------*/ string_t * mstring_extract (const string_t *str, size_t start, long end MTRACE_DECL) /* Aliased to: mstr_extract(str,start,end) * * Create and return a new string made of [..]. * If is negative, the result is made of [..]. * The result string is untabled and has one reference, * the old string is not changed. * * If memory runs out, NULL is returned. */ { size_t len, reslen; string_t *result; len = mstrsize(str); if (!len) { errorf("(mstring_extract) Can't extract from empty string.\n"); /* NOTREACHED */ return NULL; } if (end < 0) end = (long)len-1; if (end >= (long)len) { errorf("(mstring_extract) end %ld >= len %lu\n" , end, (unsigned long) len); /* NOTREACHED */ return NULL; } if (end < (long)start) { errorf("(mstring_extract) end %ld < start %lu\n" , end, (unsigned long) start); /* NOTREACHED */ return NULL; } if (start >= len) { errorf("(mstring_extract) start %lu >= string length %lu\n" , (unsigned long) start, (unsigned long)len); /* NOTREACHED */ return NULL; } reslen = (size_t)end - start + 1; result = mstring_alloc_string(reslen MTRACE_PASS); if (result && reslen) { memcpy(get_txt(result), get_txt((string_t *const)str)+start, reslen); } return result; } /* mstring_extract() */ /*-------------------------------------------------------------------------*/ Bool mstring_prefixed (const string_t *p, const string_t *s) /* Aliased to: mstrprefixed(p,s) * * Return TRUE if string begins with string

, FALSE if not. */ { const char *pp, *ps; size_t lp, ls; lp = mstrsize(p); pp = get_txt((string_t *const)p); ls = mstrsize(s); ps = get_txt((string_t *const)s); for (; lp > 0 && ls > 0; lp--, ls--) { if (*pp++ != *ps++) return MY_FALSE; } return (lp == 0) ? MY_TRUE : MY_FALSE; } /* mstring_prefixed() */ /*-------------------------------------------------------------------------*/ long mstring_chr (const string_t *p, char c) /* Aliased to: mstrchr(p,c) * * Search character in string and return its position. * Return -1 if not found. */ { char *pp; pp = memchr(get_txt((string_t *const)p), c, mstrsize(p)); if (pp != NULL) return pp - get_txt((string_t *const)p); return -1; } /* mstring_chr() */ /*-------------------------------------------------------------------------*/ void mstring_init (void) /* Initialize all datastructures and the common strings. */ { int x; stringtable = xalloc(sizeof(*stringtable) * HTABLE_SIZE); if (!stringtable) fatal("(mstring_init) Out of memory (%lu bytes) for string table\n" , (unsigned long) sizeof(*stringtable)*HTABLE_SIZE); for (x = 0; x < HTABLE_SIZE; x++) stringtable[x] = NULL; init_standard_strings(); } /* mstring_init() */ /*=========================================================================*/ #ifdef GC_SUPPORT void mstring_clear_refs (void) /* GC support: clear all refs of memory in the string table. */ { int x; for (x = 0; x < HTABLE_SIZE; x++) { string_t *p; for (p = stringtable[x]; p; p = p->next ) { p->info.ref = 0; } } } /* mstring_clear_refs() */ /*-------------------------------------------------------------------------*/ void mstring_note_refs (void) /* GC support: note all refs of memory in the string table. */ { int x; note_malloced_block_ref(stringtable); for (x = 0; x < SHSTR_NOSTRINGS; x++) { count_ref_from_string(shstring[x]); } } /* mstring_note_refs() */ /*-------------------------------------------------------------------------*/ void mstring_walk_table (void (*func) (string_t *)) /* GC support: Call (*func)(str) for all tabled strings in the string table. * * Usually the function is "mark_unreferenced_string()" which marks * unref'd strings in the table, followed by a call to mstring_gc_table(). */ { int x; for (x = 0; x < HTABLE_SIZE; x++) { string_t * p; for (p = stringtable[x]; NULL != p; p = p->next) { (*func)(p); } } } /* mstring_walk_table() */ /*-------------------------------------------------------------------------*/ void mstring_gc_table (void) /* GC support: Remove all strings from the table which have a refcount * of 0. * * This can only happen in the last stage of a GC. */ { int x; for (x = 0; x < HTABLE_SIZE; x++) { string_t * prev, * next; for (prev = NULL, next = stringtable[x]; next != NULL; ) { if (next->info.ref == 0) { string_t * this = next; /* Unlink the string from the table, then free it. */ if (prev == NULL) { stringtable[x] = this->next; next = this->next; } else { prev->next = this->next; next = this->next; } mstr_untabled_count++; mstr_untabled_size += mstr_mem_size(this); mstr_tabled_count--; mstr_tabled_size += mstr_mem_size(this); mstr_deleted++; this->info.ref = 1; this->info.tabled = MY_FALSE; free_mstring(this); } else { /* Step to next string */ prev = next; next = next->next; } } } /* for (x) */ } /* mstring_gc_table() */ #endif /* GC_SUPPORT */ /*-------------------------------------------------------------------------*/ mp_int add_string_status (strbuf_t *sbuf, Bool verbose) /* Add the string handler status suitable for printing to . * Result is the amount of memory held by the string handler. */ { # define STR_OVERHEAD (sizeof(string_t)+1) mp_uint stringtable_size; mp_uint distinct_strings; mp_uint distinct_size; mp_uint distinct_overhead; stringtable_size = HTABLE_SIZE * sizeof(string_t *); distinct_strings = mstr_tabled_count + mstr_untabled_count; distinct_size = mstr_tabled_size + mstr_untabled_size; distinct_overhead = mstr_tabled_count * STR_OVERHEAD + mstr_untabled_count * STR_OVERHEAD; if (!verbose) { strbuf_addf(sbuf , "Strings alloced\t\t\t%8lu %9lu (%lu + %lu overhead)\n" , distinct_strings, distinct_size + stringtable_size , distinct_size - distinct_overhead , distinct_overhead + stringtable_size ); } else { strbuf_add(sbuf, "\nString handler:\n"); strbuf_add(sbuf, "---------------\t Strings Bytes (Data+Overhead)\n"); strbuf_addf(sbuf, "Total asked for\t%9lu %9lu (%9lu+%9lu)\n" , mstr_used , mstr_used_size , mstr_used_size ? mstr_used_size - mstr_used * STR_OVERHEAD : 0 , mstr_used * STR_OVERHEAD ); strbuf_addf(sbuf, "Total allocated\t%9lu %9lu (%9lu+%9lu)\n" , distinct_strings , distinct_size + stringtable_size , distinct_size - distinct_overhead , distinct_overhead + stringtable_size ); strbuf_addf(sbuf, " - tabled\t%9lu %9lu (%9lu+%9lu)\n" , mstr_tabled_count , mstr_tabled_size + stringtable_size , mstr_tabled_size ? mstr_tabled_size - mstr_tabled_count * STR_OVERHEAD : 0 , mstr_tabled_count * STR_OVERHEAD + stringtable_size ); strbuf_addf(sbuf, " - untabled\t%9lu %9lu (%9lu+%9lu)\n" , mstr_untabled_count , mstr_untabled_size , mstr_untabled_size ? mstr_untabled_size - mstr_untabled_count * STR_OVERHEAD : 0 , mstr_untabled_count * STR_OVERHEAD ); strbuf_addf(sbuf, "\nSpace required vs. 'regular C' string implementation: " "%lu%% with, %lu%% without overhead.\n" , ((distinct_size + stringtable_size) * 100L) / (mstr_used_size - mstr_used * sizeof(string_t)) , ((distinct_size + stringtable_size - distinct_overhead) * 100L) / (mstr_used_size - mstr_used * STR_OVERHEAD) ); strbuf_addf(sbuf, "Searches by address: %lu - found: %lu (%.1f%%) - avg length: %7.3f\n" , mstr_searches , mstr_found, 100.0 * (float)mstr_found / (float)mstr_searches , (float)mstr_searchlen / (float)mstr_searches ); strbuf_addf(sbuf, "Searches by content: %lu - found: %lu (%.1f%%) - avg length: %7.3f\n" , mstr_searches_byvalue , mstr_found_byvalue, 100.0 * (float)mstr_found_byvalue / (float)mstr_searches_byvalue , (float)mstr_searchlen_byvalue / (float)mstr_searches_byvalue ); strbuf_addf(sbuf, "Hash chains used: %lu of %lu (%.1f%%)\n" , mstr_chains, (unsigned long)HTABLE_SIZE , 100.0 * (float)mstr_chains / (float)HTABLE_SIZE ); strbuf_addf(sbuf, "Distinct strings added: %lu " "- deleted: %lu\n" , mstr_added, mstr_deleted ); strbuf_addf(sbuf, "Collisions: %lu (%.1f%% added)\n" , mstr_collisions , 100.0 * (float)mstr_collisions / (float)mstr_added ); #ifdef EXT_STRING_STATS strbuf_addf(sbuf, "Equality tests: %lu total, %lu by table (%.1f%%), %lu by hash (%.1lf%%)\n" , stNumEqual, stNumTabledEqual , stNumEqual ? 100.0 * ((float)stNumTabledEqual/stNumEqual) : 0.0 , stNumHashEqual , stNumEqual ? 100.0 * ((float)stNumHashEqual/stNumEqual) : 0.0 ); strbuf_addf(sbuf, "Comparisons: %lu total, %lu by table (%.1f%%)\n" , stNumComp, stNumTabledComp , stNumComp ? 100.0 * ((float)stNumTabledComp/stNumComp) : 0.0 ); strbuf_addf(sbuf, "Table lookups for existence: %lu," " %lu by table (%.1f%%)," " %lu by content (%.1f%%)\n" , stNumTabledChecked , stNumTabledCheckedTable , stNumTabledChecked ? 100.0 * ((float)stNumTabledCheckedTable/stNumTabledChecked) : 0.0 , stNumTabledCheckedSearch , stNumTabledChecked ? 100.0 * ((float)stNumTabledCheckedSearch/stNumTabledChecked) : 0.0 ); #endif /* EXT_STRING_STATS */ } return stringtable_size + distinct_size; # undef STR_OVERHEAD } /* add_string_status() */ /*-------------------------------------------------------------------------*/ void string_dinfo_status (svalue_t *svp, int value) /* Return the string table information for debug_info(DINFO_DATA, DID_STATUS). * points to the svalue block for the result, this function fills in * the spots for the object table. * If is -1, points indeed to a value block; other it is * the index of the desired value and points to a single svalue. */ { #define ST_NUMBER(which,code) \ if (value == -1) svp[which].u.number = code; \ else if (value == which) svp->u.number = code ST_NUMBER(DID_ST_STRINGS, mstr_used); ST_NUMBER(DID_ST_STRING_SIZE, mstr_used_size); ST_NUMBER(DID_ST_STR_TABLE_SIZE, HTABLE_SIZE * sizeof(string_t *)); ST_NUMBER(DID_ST_STR_OVERHEAD, sizeof(string_t)-1); ST_NUMBER(DID_ST_STR_CHAINS, mstr_chains); ST_NUMBER(DID_ST_STR_ADDED, mstr_added); ST_NUMBER(DID_ST_STR_DELETED, mstr_deleted); ST_NUMBER(DID_ST_STR_COLLISIONS, mstr_collisions); ST_NUMBER(DID_ST_UNTABLED, mstr_untabled_count); ST_NUMBER(DID_ST_UNTABLED_SIZE, mstr_untabled_size); ST_NUMBER(DID_ST_TABLED, mstr_tabled_count); ST_NUMBER(DID_ST_TABLED_SIZE, mstr_tabled_size); ST_NUMBER(DID_ST_STR_SEARCHES, mstr_searches); ST_NUMBER(DID_ST_STR_SEARCHLEN, mstr_searchlen); ST_NUMBER(DID_ST_STR_SEARCHES_BYVALUE, mstr_searches_byvalue); ST_NUMBER(DID_ST_STR_SEARCHLEN_BYVALUE, mstr_searchlen_byvalue); ST_NUMBER(DID_ST_STR_FOUND, mstr_found); ST_NUMBER(DID_ST_STR_FOUND_BYVALUE, mstr_found_byvalue); #undef ST_NUMBER } /* string_dinfo_status() */ /***************************************************************************/