Bench strcpy
Last updated on
I had a need to parse a huge JSON file, and I was curious how much performance speed up could be had just checking the first character, and hashing everything instead of using strcmp.
Full code can be found here
Results
Platform | strcmp | strcmp with prefix | hash runtime | hash ahead of time |
---|---|---|---|---|
1(GCC) | 1318 | 359 | 1838 | 140 |
2(Clang) | 32715 | 475 | 2640 | 243 |
3(GCC) | 4666 | 2151 | 12923 | 542 |
3(Clang) | 4592 | 1994 | 13449 | 509 |
Times are in cycles using rdtsc counter
The platforms tested are
- Linux Ubuntu 18 | Intel(R) Core(TM) i7-8550U CPU @ 1.8GHz
- MacOSX 10.14 | Intel(R) Core(TM) i5-5257U @ 2.70GHz
- Linux Ubuntu 16 | Intel(R) Core(TM) i7-6700K CPU @ 4.00GHz
Strcpy
A basic strcpy benchmark, sets the baseline.
uint64_t
bench_strcmp() {
const char **str_it = &strings[0];
uint64_t start = get_time_rdtsc();
while(*str_it) {
if(strcmp(*str_it, search_for) == 0) {
break;
}
++str_it;
}
uint64_t end = get_time_rdtsc();
printf("Found %s\n", *str_it);
return end - start;
}
Strcpy Prefix Check
Before running strcpy check the first character, as a runtime solution this is the winner.
int
pre_check(uint8_t *a, uint8_t *b) {
return *a == *b;
}
uint64_t
bench_strcmp_prefix() {
const char **str_it = &strings[0];
uint64_t start = get_time_rdtsc();
uint8_t *search_int = (uint8_t*)search_for;
const char *str;
while(str = *str_it++, str) {
/* check prefix first */
if(!pre_check((uint8_t*)*str_it, search_int)) {
continue;
}
/* compare whole string */
if(strcmp(*str_it, search_for) == 0){
break;
}
}
uint64_t end = get_time_rdtsc();
printf("Found %s\n", *str_it);
return end - start;
}
Runtme Hashing
Hashing strings and then comparing the hashes, this doesn’t handle collisions.
uint64_t
hash_str(const char *str) {
uint64_t hash = 5381;
int c;
while(c = *str++, c) {
hash = ((hash << 5) + hash) + c;
}
return hash;
}
uint64_t
bench_hash_rt() {
const char **str_it = &strings[0];
uint64_t start = get_time_rdtsc();
uint64_t search_hash = hash_str(search_for);
while(*str_it) {
uint64_t hash = hash_str(*str_it);
if(hash == search_hash) {
break;
}
++str_it;
}
uint64_t end = get_time_rdtsc();
printf("Found %s\n", *str_it);
return end - start;
}
Ahead Of Time Hashing
Hashing strings upfront. Predictably this is the fastests.
uint64_t
hash_str(const char *str) {
uint64_t hash = 5381;
int c;
while(c = *str++, c) {
hash = ((hash << 5) + hash) + c;
}
return hash;
}
uint64_t
bench_hash_at() {
/* build hash table */
uint64_t hash_arr[(sizeof(strings) / sizeof(strings[0]))];
int count = (sizeof(strings) / sizeof(strings[0]));
int i;
for(i = 0; i < count; ++i) {
if(strings[i] == NULL) {
hash_arr[i] = (uint64_t)-1;
}
else {
hash_arr[i] = hash_str(strings[i]);
}
}
/* search */
uint64_t *hash_it = &hash_arr[0];
uint64_t search_hash = hash_str(search_for);
uint64_t start = get_time_rdtsc();
while(*hash_it != (uint64_t)-1) {
if(*hash_it == search_hash) {
break;
}
++hash_it;
}
uint64_t end = get_time_rdtsc();
printf("Found %s\n", strings[hash_it - &hash_arr[0]]);
return end - start;
}