diff --git a/support/logresolve.c b/support/logresolve.c index 1a36a18..612893a 100644 --- a/support/logresolve.c +++ b/support/logresolve.c @@ -15,12 +15,13 @@ */ /* - * logresolve 1.1 + * logresolve 2.0 * * Tom Rathborne - tomr uunet.ca - http://www.uunet.ca/~tomr/ * UUNET Canada, April 16, 1995 * * Rewritten by David Robinson. (drtr ast.cam.ac.uk) + * Rewritten again, and ported to APR by Colm MacCarthaigh * * Usage: logresolve [-s filename] [-c] < access_log > new_log * @@ -28,7 +29,7 @@ * -s filename name of a file to record statistics * -c check the DNS for a matching A record for the host. * - * Notes: + * Notes: (For historical interest) * * To generate meaningful statistics from an HTTPD log file, it's good * to have the domain name of each machine that accessed your site, but @@ -55,333 +56,269 @@ * that one of these matches the original address. */ +#include "apr.h" #include "apr_lib.h" -#if APR_HAVE_STDIO_H -#include -#endif +#include "apr_hash.h" +#include "apr_getopt.h" +#include "apr_strings.h" +#include "apr_file_io.h" +#include "apr_network_io.h" + #if APR_HAVE_STDLIB_H #include #endif -#if APR_HAVE_CTYPE_H -#include -#endif -#if APR_HAVE_NETDB_H -#include -#endif -#if APR_HAVE_NETINET_IN_H -#include -#endif -#if APR_HAVE_STRING_H -#include -#endif -#if APR_HAVE_SYS_SOCKET_H -#include -#endif -#if APR_HAVE_ARPA_INET_H -#include -#endif - -static void cgethost(struct in_addr ipnum, char *string, int check); -static int get_line(char *s, int n); -static void stats(FILE *output); - -#ifdef BEOS -#define NO_ADDRESS NO_DATA -#endif - - -/* maximum line length */ -#ifndef MAXLINE -#define MAXLINE 1024 -#endif - -/* maximum length of a domain name */ -#ifndef MAXDNAME -#define MAXDNAME 256 -#endif - -/* number of buckets in cache hash apr_table_t */ -#define BUCKETS 256 - -/* - * struct nsrec - record of nameservice for cache linked list - * - * ipnum - IP number hostname - hostname noname - nonzero if IP number has no - * hostname, i.e. hostname=IP number - */ - -struct nsrec { - struct in_addr ipnum; - char *hostname; - int noname; - struct nsrec *next; -} *nscache[BUCKETS]; - -/* - * statistics - obvious - */ - -#ifndef h_errno -#ifdef __CYGWIN__ -extern __declspec(dllimport) int h_errno; -#else -extern int h_errno; /* some machines don't have this in their headers */ -#endif -#endif - -/* largest value for h_errno */ - -#define MAX_ERR (NO_ADDRESS) -#define UNKNOWN_ERR (MAX_ERR+1) -#define NO_REVERSE (MAX_ERR+2) +static apr_file_t *errfile; +static const char *shortname = "logresolve"; +static apr_hash_t *cache; + +/* Statistics */ static int cachehits = 0; static int cachesize = 0; static int entries = 0; static int resolves = 0; static int withname = 0; -static int errors[MAX_ERR + 3]; +static int doublefailed = 0; +static int noreverse = 0; /* * cgethost - gets hostname by IP address, caching, and adding unresolvable * IP numbers with their IP number as hostname, setting noname flag - */ - -static void cgethost (struct in_addr ipnum, char *string, int check) -{ - struct nsrec **current, *new; - struct hostent *hostdata; - char *name; - - current = &nscache[((ipnum.s_addr + (ipnum.s_addr >> 8) + - (ipnum.s_addr >> 16) + (ipnum.s_addr >> 24)) % BUCKETS)]; - - while (*current != NULL && ipnum.s_addr != (*current)->ipnum.s_addr) - current = &(*current)->next; - - if (*current == NULL) { - cachesize++; - new = (struct nsrec *) malloc(sizeof(struct nsrec)); - if (new == NULL) { - perror("malloc"); - fprintf(stderr, "Insufficient memory\n"); - exit(1); - } - *current = new; - new->next = NULL; - - new->ipnum = ipnum; - - hostdata = gethostbyaddr((const char *) &ipnum, sizeof(struct in_addr), - AF_INET); - if (hostdata == NULL) { - if (h_errno > MAX_ERR) - errors[UNKNOWN_ERR]++; - else - errors[h_errno]++; - new->noname = h_errno; - name = strdup(inet_ntoa(ipnum)); - } - else { - new->noname = 0; - name = strdup(hostdata->h_name); - if (check) { - if (name == NULL) { - perror("strdup"); - fprintf(stderr, "Insufficient memory\n"); - exit(1); - } - hostdata = gethostbyname(name); - if (hostdata != NULL) { - char **hptr; - - for (hptr = hostdata->h_addr_list; *hptr != NULL; hptr++) - if (((struct in_addr *) (*hptr))->s_addr == ipnum.s_addr) - break; - if (*hptr == NULL) - hostdata = NULL; - } - if (hostdata == NULL) { - fprintf(stderr, "Bad host: %s != %s\n", name, - inet_ntoa(ipnum)); - new->noname = NO_REVERSE; - free(name); - name = strdup(inet_ntoa(ipnum)); - errors[NO_REVERSE]++; - } - } - } - new->hostname = name; - if (new->hostname == NULL) { - perror("strdup"); - fprintf(stderr, "Insufficient memory\n"); - exit(1); - } - } - else - cachehits++; - - /* size of string == MAXDNAME +1 */ - strncpy(string, (*current)->hostname, MAXDNAME); - string[MAXDNAME] = '\0'; -} - -/* * prints various statistics to output */ -static void stats (FILE *output) +#define NL APR_EOL_STR +static void print_statistics (apr_file_t *output) { - int i; - char *ipstring; - struct nsrec *current; - char *errstring[MAX_ERR + 3]; - - for (i = 0; i < MAX_ERR + 3; i++) - errstring[i] = "Unknown error"; - errstring[HOST_NOT_FOUND] = "Host not found"; - errstring[TRY_AGAIN] = "Try again"; - errstring[NO_RECOVERY] = "Non recoverable error"; - errstring[NO_DATA] = "No data record"; - errstring[NO_ADDRESS] = "No address"; - errstring[NO_REVERSE] = "No reverse entry"; - - fprintf(output, "logresolve Statistics:\n"); - - fprintf(output, "Entries: %d\n", entries); - fprintf(output, " With name : %d\n", withname); - fprintf(output, " Resolves : %d\n", resolves); - if (errors[HOST_NOT_FOUND]) - fprintf(output, " - Not found : %d\n", errors[HOST_NOT_FOUND]); - if (errors[TRY_AGAIN]) - fprintf(output, " - Try again : %d\n", errors[TRY_AGAIN]); - if (errors[NO_DATA]) - fprintf(output, " - No data : %d\n", errors[NO_DATA]); - if (errors[NO_ADDRESS]) - fprintf(output, " - No address: %d\n", errors[NO_ADDRESS]); - if (errors[NO_REVERSE]) - fprintf(output, " - No reverse: %d\n", errors[NO_REVERSE]); - fprintf(output, "Cache hits : %d\n", cachehits); - fprintf(output, "Cache size : %d\n", cachesize); - fprintf(output, "Cache buckets : IP number * hostname\n"); - - for (i = 0; i < BUCKETS; i++) - for (current = nscache[i]; current != NULL; current = current->next) { - ipstring = inet_ntoa(current->ipnum); - if (current->noname == 0) - fprintf(output, " %3d %15s - %s\n", i, ipstring, - current->hostname); - else { - if (current->noname > MAX_ERR + 2) - fprintf(output, " %3d %15s : Unknown error\n", i, - ipstring); - else - fprintf(output, " %3d %15s : %s\n", i, ipstring, - errstring[current->noname]); - } - } + apr_file_printf(output, "logresolve Statistics:" NL); + apr_file_printf(output, "Entries: %d" NL, entries); + apr_file_printf(output, " With name : %d" NL, withname); + apr_file_printf(output, " Resolves : %d" NL, resolves); + + if (noreverse) { + apr_file_printf(output, " - No reverse : %d" NL, + noreverse); + } + + if (doublefailed) { + apr_file_printf(output, " - Double lookup failed : %d" NL, + doublefailed); + } + apr_file_printf(output, "Cache hits : %d" NL, cachehits); + apr_file_printf(output, "Cache size : %d" NL, cachesize); } /* - * gets a line from stdin + * usage info */ -static int get_line (char *s, int n) +static void usage(void) { - char *cp; - - if (!fgets(s, n, stdin)) - return (0); - cp = strchr(s, '\n'); - if (cp) - *cp = '\0'; - return (1); + apr_file_printf(errfile, + "%s -- Resolve IP-addresses to hostnames in Apache log files." NL + "Usage: %s [-s STATFILE] [-c]" NL + NL + "Options:" NL + " -s Record statistics to STATFILE when finished." NL + NL + " -c Perform double lookups when resolving IP addresses." NL, + shortname, shortname); + exit(1); } -int main (int argc, char *argv[]) +#undef NL + +int main(int argc, const char * const argv[]) { - struct in_addr ipnum; - char *bar, hoststring[MAXDNAME + 1], line[MAXLINE], *statfile; - int i, check; - -#if defined(WIN32) || (defined(NETWARE) && defined(USE_WINSOCK)) - /* If we apr'ify this code, apr_pool_create/apr_pool_destroy - * should perform the WSAStartup/WSACleanup for us. - */ - WSADATA wsaData; - WSAStartup(MAKEWORD(2, 0), &wsaData); + apr_file_t * outfile; + apr_file_t * infile; + apr_file_t * statsfile; + apr_sockaddr_t * ip; + apr_sockaddr_t * ipdouble; + apr_getopt_t * o; + apr_pool_t * pool; + apr_status_t status; + const char * arg; + char opt; + char * stats = NULL; + char * space; + char * hostname; +#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3) + char * inbuffer; + char * outbuffer; #endif + char line[2048]; + int doublelookups = 0; + + if (apr_app_initialize(&argc, &argv, NULL) != APR_SUCCESS) { + return 1; + } - check = 0; - statfile = NULL; - for (i = 1; i < argc; i++) { - if (strcmp(argv[i], "-c") == 0) - check = 1; - else if (strcmp(argv[i], "-s") == 0) { - if (i == argc - 1) { - fprintf(stderr, "logresolve: missing filename to -s\n"); - exit(1); - } - i++; - statfile = argv[i]; + atexit(apr_terminate); + + if (argc) { + shortname = apr_filepath_name_get(argv[0]); + } + + if (apr_pool_create(&pool, NULL) != APR_SUCCESS) { + return 1; + } + apr_file_open_stderr(&errfile, pool); + apr_getopt_init(&o, pool, argc, argv); + + while (1) { + status = apr_getopt(o, "s:c", &opt, &arg); + if (status == APR_EOF) { + break; } - else { - fprintf(stderr, "Usage: logresolve [-s statfile] [-c] < input > output\n"); - exit(0); + else if (status != APR_SUCCESS) { + usage(); } + else { + switch (opt) { + case 'c': + if (doublelookups) { + usage(); + } + doublelookups = 1; + break; + case 's': + if (stats) { + usage(); + } + stats = apr_pstrdup(pool, arg); + break; + } /* switch */ + } /* else */ + } /* while */ + + apr_file_open_stdout(&outfile, pool); + apr_file_open_stdin(&infile, pool); + +#if APR_MAJOR_VERSION > 1 || (APR_MAJOR_VERSION == 1 && APR_MINOR_VERSION >= 3) + /* Allocate two new 10k file buffers */ + if ((outbuffer = apr_palloc(pool, 10240)) == NULL || + (inbuffer = apr_palloc(pool, 10240)) == NULL) { + return 1; } + + /* Set the buffers */ + apr_file_buffer_set(infile, inbuffer, 10240); + apr_file_buffer_set(outfile, outbuffer, 10240); +#endif + + cache = apr_hash_make(pool); - for (i = 0; i < BUCKETS; i++) - nscache[i] = NULL; - for (i = 0; i < MAX_ERR + 2; i++) - errors[i] = 0; - - while (get_line(line, MAXLINE)) { - if (line[0] == '\0') + while(apr_file_gets(line, 2048, infile) == APR_SUCCESS) { + if (line[0] == '\0') { continue; + } + + /* Count our log entries */ entries++; - if (!apr_isdigit(line[0])) { /* short cut */ - puts(line); - withname++; + + /* Check if this could even be an IP address */ + if (!apr_isxdigit(line[0]) && line[0] != ':') { + withname++; + apr_file_puts(line, outfile); + continue; + } + + /* Terminate the line at the next space */ + if((space = strchr(line, ' ')) != NULL) { + *space = '\0'; + } + + /* See if we have it in our cache */ + hostname = (char *) apr_hash_get(cache, (const void *)line, + strlen(line)); + if (hostname) { + apr_file_printf(outfile, "%s %s", hostname, space + 1); + cachehits++; + continue; + } + + /* Parse the IP address */ + status = apr_sockaddr_info_get(&ip, line, APR_UNSPEC ,0, 0, pool); + if (status != APR_SUCCESS) { + /* Not an IP address */ + withname++; + *space = ' '; + apr_file_puts(line, outfile); continue; } - bar = strchr(line, ' '); - if (bar != NULL) - *bar = '\0'; - ipnum.s_addr = inet_addr(line); - if (ipnum.s_addr == 0xffffffffu) { - if (bar != NULL) - *bar = ' '; - puts(line); - withname++; + + /* This does not make much sense, but historically "resolves" means + * "parsed as an IP address". It does not mean we actually resolved + * the IP address into a hostname. + */ + resolves++; + + /* From here on our we cache each result, even if it was not + * succesful + */ + cachesize++; + + /* Try and perform a reverse lookup */ + status = apr_getnameinfo(&hostname, ip, 0) != APR_SUCCESS; + if (status || hostname == NULL) { + /* Could not perform a reverse lookup */ + *space = ' '; + apr_file_puts(line, outfile); + noreverse++; + + /* Add to cache */ + *space = '\0'; + apr_hash_set(cache, (const void *) line, strlen(line), + (const void *) apr_pstrdup(pool, line)); continue; } - resolves++; + /* Perform a double lookup */ + if (doublelookups) { + /* Do a forward lookup on our hostname, and see if that matches our + * original IP address. + */ + status = apr_sockaddr_info_get(&ipdouble, hostname, ip->family, 0, + 0, pool); + if (status == APR_SUCCESS || + memcmp(ipdouble->ipaddr_ptr, ip->ipaddr_ptr, ip->ipaddr_len)) { + /* Double-lookup failed */ + *space = ' '; + apr_file_puts(line, outfile); + doublefailed++; + + /* Add to cache */ + *space = '\0'; + apr_hash_set(cache, (const void *) line, strlen(line), + (const void *) apr_pstrdup(pool, line)); + continue; + } + } - cgethost(ipnum, hoststring, check); - if (bar != NULL) - printf("%s %s\n", hoststring, bar + 1); - else - puts(hoststring); - } + /* Outout the resolved name */ + apr_file_printf(outfile, "%s %s", hostname, space + 1); -#if defined(WIN32) || (defined(NETWARE) && defined(USE_WINSOCK)) - WSACleanup(); -#endif + /* Store it in the cache */ + apr_hash_set(cache, (const void *) line, strlen(line), + (const void *) apr_pstrdup(pool, hostname)); + } - if (statfile != NULL) { - FILE *fp; - fp = fopen(statfile, "w"); - if (fp == NULL) { - fprintf(stderr, "logresolve: could not open statistics file '%s'\n" - ,statfile); - exit(1); + /* Flush any remaining output */ + apr_file_flush(outfile); + + if (stats) { + if (apr_file_open(&statsfile, stats, + APR_FOPEN_WRITE | APR_FOPEN_CREATE | APR_FOPEN_TRUNCATE, + APR_OS_DEFAULT, pool) != APR_SUCCESS) { + apr_file_printf(errfile, "%s: Could not open %s for writing.", + shortname, stats); + return 1; } - stats(fp); - fclose(fp); + print_statistics(statsfile); + apr_file_close(statsfile); } - return (0); + return 0; }