Peter's Blog

Redefining the Impossible

Items filed under awstats


Extracts from awstats logs, bandwidth used by various visitors to this site:

crawler.bloglines.com353.02 MB
MSIECrawler 567.41 MB
Inktomi Slurp 135.79 MB
Googlebot 59.86 MB

Apparently MSIECrawler is IE sucking the entire contents of the site. A couple of people seem to have done this, what is the point? Is this site that interesting? Are the spam blogs (copies of legitimate blogs full of links to p0ker sites) using IE for their scraping technology? My attitude to the spammers turns from annoyance to pity.

Bloglines is getting a bit carried away, 353M just downloading RSS feeds.

InkTomi Slurp is still slurping and not returning any visitors from search results.

Googlebot drives 95% of my traffic so 59M is acceptable.

Here is my latest crack at apache log file analysis in python:

   1  #
   2  # Apache log file analysis.
   3  #
   4  import re
   5  import datetime
   6  
   7  #
   8  # Regular expression for parsing apache log file.
   9  #
  10  oLogRE = re.compile( r'''([\d.]+).*\s+                # host
  11                    [^\s]+\s+                        # ?
  12                    [^\s]+\s+                        # ?
  13                    \[(.*?)\]\s+                     # when
  14                    "(.*?)\s+(.*)\s+(.*)"\s+         # method, path, protocol
  15                    (\d+)\s+                         # Error code
  16                    ([^\s]+)\s+                      # Size ?
  17                    "(.*?)"\s+                       # Referrer
  18                    "(.*?)"                          # Agent
  19  ''', re.VERBOSE)
  20  
  21  LOG_Who = 0
  22  LOG_When = 1
  23  LOG_How = 2
  24  LOG_What = 3
  25  LOG_Protocol = 4
  26  LOG_Error = 5
  27  LOG_Size = 6
  28  LOG_Referrer = 7
  29  LOG_Agent = 8
  30  
  31  def ScanFile( strFile):
  32      """
  33      Scan apache log file and return hits.
  34      """
  35      for strLine in open( 'c:\\Desktop\\access.log').readlines():
  36          oMatch = oLogRE.match( strLine)
  37          if oMatch:
  38              yield( oMatch.groups())
  39          else:
  40              print 'Reject: %s' % strLine
  41  
  42  def GatherBy( oHits, nField):
  43      """
  44      Gather hits from list of hits into a dictionary keyed
  45      by unique values of a specific field.
  46      """
  47      oDict = {}
  48  
  49      for oHit in oHits:
  50          oKey = oHit[nField]
  51          if oKey in oDict:
  52              oDict[oKey].append( oHit)
  53          else:
  54              oDict[oKey] = [oHit]
  55  
  56      return oDict
  57  
  58  def FilterBy( oHits, nField, strFilter):
  59      """
  60      Filter hits from list of hits by unique values of a specific field.
  61      """
  62      oRE = re.compile( strFilter)
  63  
  64      for oHit in oHits:
  65          if oRE.search( oHit[nField]):
  66              yield( oHit)
  67  
  68  def FilterByDate( oHits,
  69                    oStartDate,
  70                    oEndDate = datetime.date.today() + datetime.timedelta(1)):
  71      """
  72      Filter hits >= Start Date and < End Date
  73      """
  74      oRE = re.compile( r'(\d+)/(\w+)/(\d+).*')
  75  
  76      for oHit in oHits:
  77          strDate = oHit[LOG_When]
  78          strDay, strMonth, strYear = oRE.match( strDate).groups()
  79  
  80          nDay = int( strDay)
  81          nMonth = ['Jan', 'Feb', 'Mar',
  82                    'Apr', 'May', 'Jun',
  83                    'Jul', 'Aug', 'Sep',
  84                    'Oct', 'Nov', 'Dec'].index( strMonth) + 1
  85          nYear = int( strYear)
  86  
  87          oDate = datetime.date( nYear, nMonth, nDay)
  88  
  89          if oDate >= oStartDate and oDate < oEndDate:
  90              yield( oHit)
  91  
  92  def AnalyseBy( oHits, nField, bJustSummary = False):
  93      """
  94      Print hits by unique values of a specific field
  95      and generate counts and bytes for each unique value.
  96      """
  97      oDict = GatherBy( oHits, nField)
  98  
  99      oKeys = oDict.keys()
 100  
 101      oKeys.sort()
 102  
 103      nGrandTotalCounts = 0
 104      nGrandTotalBytes = 0
 105  
 106      for oKey in oKeys:
 107          nCount = len( oDict[oKey])
 108  
 109          nTotal = 0
 110  
 111          for oHit in oDict[oKey]:
 112              strSize = oHit[LOG_Size]
 113              if strSize != '-':
 114                  nTotal += int(strSize)
 115  
 116          if not bJustSummary:
 117              print oKey, nCount, nTotal
 118  
 119          nGrandTotalCounts += nCount
 120          nGrandTotalBytes += nTotal
 121  
 122      print "Unique items: %d, Total Hits: %d, Total Bytes: %d" % (len(oKeys),
 123                                                                   nGrandTotalCounts,
 124                                                                   nGrandTotalBytes)
 125  
 126  oStartDate = datetime.date.today() - datetime.timedelta( 8 ) # week yesterday
 127  oEndDate = datetime.date.today() - datetime.timedelta( 1 )  # yesterday
 128  
 129  oAllHits = list( FilterByDate( ScanFile( 'c:\\Desktop\\access.log'),
 130                                 oStartDate, oEndDate))
 131  oAllHits.extend( list( FilterByDate( ScanFile( 'c:\\Desktop\\access.log.1'),
 132                                       oStartDate, oEndDate)))
 133  
 134  print "User Agents"
 135  AnalyseBy( oAllHits, LOG_Agent, True)
 136  
 137  print "All Hosts (hence all usage)"
 138  AnalyseBy( oAllHits, LOG_Who, True)
 139  
 140  print "Hits from bloglines"
 141  #
 142  # Determine different bloglines feeds and analyse each one
 143  #
 144  for strFeed, oFeedHits in GatherBy( FilterBy( oAllHits,
 145                                                LOG_Agent,
 146                                                'Bloglines'),
 147                                      LOG_What).items():
 148      #
 149      # Now analyse by agent:  agent includes number of subscribers
 150      # so we see subscribers per feed.
 151      print "Bloglines feed %s" % strFeed
 152      AnalyseBy( oFeedHits, LOG_Agent)
 153  
 154  print "Hits from MSIECrawler by host"
 155  AnalyseBy( FilterBy( oAllHits, LOG_Agent, 'MSIECrawler'), LOG_Who)
 156  
 157  print "Hits from Inktomi/yahoo slurp"
 158  AnalyseBy( FilterBy( oAllHits, LOG_Agent, 'Slurp'), LOG_Agent)
Toggle Line Numbers

Example output for week ending yesterday. Notes:

  • 659,628,604 bytes served in a week!
  • Slurp took 45,786,926 bytes
  • MSIECrawl user took 49,980,154 bytes
  • I've got more bloglines subscribers than I thought.
  • Just how many rss feed urls does drupal provide?
User Agents
Unique items: 497, Total Hits: 76052, Total Bytes: 659628604

All Hosts (hence all usage)
Unique items: 3301, Total Hits: 76052, Total Bytes: 659628604

Hits from bloglines
Bloglines feed /blog/1/feed
Bloglines/3.0-rho (http://www.bloglines.com; 1 subscriber) 252 13730570
Bloglines/3.0-rho (http://www.bloglines.com; 3 subscribers) 256 13947306
Bloglines/3.0-rho (http://www.bloglines.com; 5 subscribers) 252 13730570
Bloglines/3.0-rho (http://www.bloglines.com; 7 subscribers) 256 13947306
Unique items: 4, Total Hits: 1016, Total Bytes: 55355752
Bloglines feed /blog/feed
Bloglines/3.0-rho (http://www.bloglines.com; 1 subscriber) 242 13189698
Unique items: 1, Total Hits: 242, Total Bytes: 13189698
Bloglines feed /atom/feed
Bloglines/3.0-rho (http://www.bloglines.com; 3 subscribers) 256 1449434
Unique items: 1, Total Hits: 256, Total Bytes: 1449434
Bloglines feed /tags/18/feed
Bloglines/3.0-rho (http://www.bloglines.com; 1 subscriber) 256 12427520
Unique items: 1, Total Hits: 256, Total Bytes: 12427520
Bloglines feed /blog/feed/1
Bloglines/3.0-rho (http://www.bloglines.com; 5 subscribers) 252 0
Unique items: 1, Total Hits: 252, Total Bytes: 0
Bloglines feed /taxonomy/term/5/0/feed
Bloglines/3.0-rho (http://www.bloglines.com; 1 subscriber) 242 3832796
Unique items: 1, Total Hits: 242, Total Bytes: 3832796
Bloglines feed /node/feed
Bloglines/3.0-rho (http://www.bloglines.com; 3 subscribers) 256 13959338
Unique items: 1, Total Hits: 256, Total Bytes: 13959338
Bloglines feed /rss.xml
Bloglines/3.0-rho (http://www.bloglines.com; 3 subscribers) 256 0
Bloglines/3.0-rho (http://www.bloglines.com; 7 subscribers) 256 0
Unique items: 2, Total Hits: 512, Total Bytes: 0
Bloglines feed /tags/3/feed
Bloglines/3.0-rho (http://www.bloglines.com; 1 subscriber) 250 12999000
Unique items: 1, Total Hits: 250, Total Bytes: 12999000

Hits from MSIECrawler by host
81.159.46.223 1784 49980154
Unique items: 1, Total Hits: 1784, Total Bytes: 49980154

Hits from Inktomi/yahoo slurp
Mozilla/5.0 (compatible; Yahoo! Slurp China;) 58 800642
Mozilla/5.0 (compatible; Yahoo! Slurp;) 2066 44986284
Unique items: 2, Total Hits: 2124, Total Bytes: 45786926

4 Comments

Since I added the awstats GeoIP module I have had visitors from:

  • Aero/Travel domains
  • Albania
  • Antigua and Barbuda
  • Argentina
  • Ascension Island
  • Australia
  • Austria
  • Belgium
  • Bolivia
  • Bosnia-Herzegovina
  • Brazil
  • Brunei Darussalam
  • Bulgaria
  • Canada
  • Chile
  • China
  • Christmas Island
  • Cocos (Keeling) Islands
  • Colombia
  • Commercial
  • Costa Rica
  • Croatia
  • Cyprus
  • Czech Republic
  • Denmark
  • Dominican Republic
  • Ecuador
  • Egypt
  • Estonia
  • European Union
  • Finland
  • France
  • Germany
  • Great Britain
  • Greece
  • Guatemala
  • Honduras
  • Hong Kong
  • Hungary
  • Iceland
  • India
  • Indonesia
  • Info domains
  • International
  • Ireland
  • Israel
  • Italy
  • Japan
  • Jordan
  • Kenya
  • Latvia
  • Lithuania
  • Luxembourg
  • Macedonia
  • Malaysia
  • Malta
  • Mexico
  • Moldova
  • Morocco
  • Nepal
  • Netherlands
  • Network
  • New Zealand
  • Niue
  • Non-Profit Organizations
  • Norway
  • Old style Arpanet
  • Pakistan
  • Paraguay
  • Peru
  • Philippines
  • Poland
  • Portugal
  • Puerto Rico
  • Romania
  • Russian Federation
  • Satellite access host
  • Saudi Arabia
  • Singapore
  • Slovak Republic
  • Slovenia
  • South Africa
  • South Korea
  • Spain
  • Sri Lanka
  • Sweden
  • Switzerland
  • Taiwan
  • Thailand
  • Trinidad and Tobago
  • Turkey
  • Ukraine
  • United Kingdom
  • United States
  • Unknown
  • Uruguay
  • USA Educational
  • USA Government
  • USA Military
  • Venezuela
  • Vietnam
  • Yugoslavia

Filed under: awstats


I was studying the statcounter logs for this site and lamenting how the country info only lists the last 100 page views. Because one visitor can look at 50 pages, their country will appear to take 50% of traffic, giving a distorted view of proceedings.

So I decided to update my awstats config with GeoIP to give long term country information. GeoIP is a library from MaxMind that converts IP addresses to countries. Country lookup is free, you can pay for higher resolution lookup (city etc) if you have the $$$.

I installed it as follows:

  • Installed the MaxMind GeoIP C Library and perl library from here
  • I downloaded and installed the country database.
  • I let these configure and install themselves. This may lead to an unholy mix with my debian apt setup but it is more likely to work than if I start messing with it.
  • I went into webmin and enabled the GeoIP plugin. A word of recommendation here, webmin does a great job of managing multiple sites with awstats. In fact webmin does a great job of most things.

There is an old version of GeoIP that comes as a debian package but it installs in a debian way and was not picked up by the MaxMind Perl module so I installed the MaxMind stuff by the book.

It seems to be running, it is showing visitors from germany and stuff but I need to let it run for longer and see if the number of visitors from unknown countries goes down (it probably doesn't add country info to old records only new ones).

This may end up simply telling me where the bots that make up a large number of my visitors come from: so far this month this site has used 1.15G of bandwidth. Accursed Inktomi Slurp (which I think is yahoo) has taken 177Mb of this. Googlebot has taken 77M but given me 7073 visitors against 182. Slurp is a good name. It has probably uploaded all the text on the site 100 times. I don't change it that much.


3 Comments

I set up awstats on my dedicated server. Awstats is a very comprehensive apache log file analyser that lets me see what has been going on at my site. I mainly use statcounter for visitor analysis is it allows me to see precisely what they have been doing, where they came from, which pages they looked at etc. Awstats is more statistics based, giving overall averages and summaries. Also, Awstats tells me about bots and crawlers which statcounter filters out.

Setting it up amounted to:

  • install awstats package using dselect
  • edit /etc/awstats/conf.local to customise, using settings from /etc/awstats/awstats.conf
    • point it at my log file
    • give it site name
    • set log format 1 which appears to be bog standard apache
    • exclude me/my ip addresses from stats
    • enable reverse DNS to see who is accessing me, not just ip addresses
  • edit /etc/logrotate.d/apache2 and add:
    # pcw: from awstats faq: run awstats before log file is lost
    prerotate
    /usr/lib/cgi-bin/awstats.pl -update -config=petersblog.org
    endscript
    
    so log files get processed before logrotate renames/deletes them
  • set up cron job to update stats every three hours. This is to keep awstats database updated and spread out the time it takes
    10 0,3,6,9,12,15,18,21 * * * /usr/local/awstats/wwwroot/cgi-bin/awstats.pl -config=petersblog.org -update >/dev/null
    
  • set up apache to deny access to awstats from anyone but me. This is for two reasons:
    1. privacy
    2. awstats has had at least one bad vulnerability in the past that allowed sites to be hacked

This gives me a better awstats setup than site5 gave me as I have enabled the reverse DNS lookup, meaning I see originating site names rather than IP addresses.


Filed under: awstats debian oneandone


The number of visitors to my site has increased by about 40% in the last two days. Yesterday I got an all-time record of 509 unique visitors (according to statcounter). It's about a level that I can quote without feeling embarrassed. According to statcounters 'came from' thing they mostly come from google.

I must admit I don't totally trust statcounter. Today it shows this in 'keyword analysis':

4  7.41%	onenote firefox
3  5.56%	thunderbird taskbar notifier
3  5.56%	vnc ubuntu
2  3.70%	ubuntu mp3

This would imply that four different people came from google searching for 'onenote firefox'. If I drill down into 'onenote firefox' it only shows one ip address with eight entries. What gives ? How many 'unique user visitors' is this supposed to be? Why would someone come here from the same search four times?

I still trust statcounter not to show me hits from bots, crawlers, comment spammers and whatever. According to awstats I got 649 visitors. Also, in the awstats referrer list out of the top 25 entries this month only 4 are referrer spams links with obscene url's, the others come from techy sites or searches from other than google/msn/yahoo (I am on the statcounter free program which only shows details for the last 100 hits, awstats is unlimited).

This is almost credibility. Thanks for reading.


Filed under: awstats google statcounter


awstats is a program for displaying web server access statistics.


Filed under: awstats


Since I mentioned awstats on this blog I've been getting attempts to access the awstats.pl script on this site. awstats.pl is not accessable through this domain, it is provided by Site5 but I have to log in to netadmin to get to them.

Anyway, I had a quick search to see if there was a way to hack in via awstats and sure enough there is. The trick mentioned in this article is the one they are trying to get in with:

200.223.55.134 - - [11/Feb/2005:14:44:54 -0500] "GET
/stats/cgi-bin/awstats.pl?configdir=|echo%20;echo%20;id;echo%20;echo%20|
HTTP/1.0" 404 6186 "-" "Mozilla/4.0 (compatible; MSIE 6.0b; Windows NT 5.0)"

this is trying to execute the command id which shows the uid, gid and groups of the account it runs in. I guess this is probing for this vulnerability and seeing whether it gives root access.

The break-in attempts are coming from a variety of IPs, as is usual they are using proxys so there is no point trying to block them. They are getting 403s anyway, they aren't consuming much bandwidth.

Moral: keep an eye on your access logs, see what folk are up to.


1 Comment

I never did get around to trying to install awstats. I've been using Statcounter but I fancied trying awstats with reverse DNS turned on. I can't do this on my Site5 host as they don't like reverse DNS. I didn't install it on Gentoo as that looked like big time hastle.

I realised today that installing awstats under Ubuntu should be as simple as installing the awstats package and it almost is. I can install it on my home server, download my Site5 access logs there and let awstats format them up.

Here are the steps I had to take to install it:

  • Install awstats package
  • Edit a file called /etc/awstats/awstats.hostname.conf where hostname is the hostname. Put something like this in it:
    LogFile="/var/log/apache/access.log"
    LogFormat=1
    DNSLookup=1
    DirData="/var/cache/awstats/"
    DirCgi="/cgi-bin"
    DirIcons="/icon"
    SiteDomain="hostname"
    AllowToUpdateStatsFromBrowser=1
    AllowFullYearView=3
    
  • Make a directory called /var/cache and chmod it 777 so it can be used from the web server
  • Copy icons to web directory:
    cp -r /usr/share/awstats/icon /var/www/icon
    
  • Run this to update databases:
    /usr/lib/cgi-bin/awstats.pl -config=hostname -update
    
  • In your web browser, go to the url:
    http://hostname/cgi-bin/awstats.pl?config=hostname
    
  • Study the stats in quiet awe
  • Edit crontab to update stats automatically every night:
    crontab -e
    0 1 * * * /usr/lib/cgi-bin/awstats.pl -config=hostname -update
    

5 Comments

I've been keeping an eye on my visitor logs to see how much my domain name problems have effected my traffic. According to Statcounter they had been climbing but yesterday there is a sudden dip. The Awstats logs provided by Site5 show no such dip.

I've seen a number of such dips in the Statcounter logs: their servers do not appear to be the most reliable. This is not a big complaint, I use them for free, more of a lamentation. Their professional service is too expensive for my simple ego brushing needs, $9 a month, but if I was paying that I would not want drop-outs approximately once a week.

The main advantage of Statcounter for me is that it counts visitors who have javascript enabled so it is essentially counting human beings rather than crawlers and referrer spam bots. It is also easy to set it up to ignore my own IP address. The Drupal statistics module does not have this feature but I could simply use phpmyadmin or another generic mysql database report generation tool to filter the drupal logs in any way I desire. The statistics module does list external referrers in reverse chronological order so it is useful for updating .htaccess referrer exclusion lists.


1 Comment

I've subscribed to StatCounter which is a service for gathering statistics for web traffic. It's a hit counter with very nice tracking facilities. You don't have to have a hit counter displayed on your page: I've hidden mine, it's for me to know how fewmany visitors I have had. It shows things like:

  • Where in the world visitors are connecting from (hey there, how are things in China?), their ISP's etc.
  • Which pages they have looked at (how they a going around the site)
  • How long they spend looking at each page
  • Easily set it up to ignore my own accesses

It does the kind of things that awstats, webstats, Drupal statistics etc cannot do without reverse DNS lookups etc.

It relies on the visitor's browser downloading a little bitmap and running some javascript. I think this is good as it means it will not record the 300 odd bots (including damn referrer spam) that suck my bandwidth every month, I can tell which visitors are humans. If I want to look at bot activity I can analyse the raw logs. Awstats and Drupal ststistics are still there for me.

Best of all, it's free if you have less than 1000 visitors a day.


Filed under: awstats drupal statcounter

3 Comments

My Site5 hosting provider gives me access to Awstats and it's interesting to see what people having been looking at on this site. So far we are half way through 4th November and I've already had 74 hits from google searches. The most interesting search phrase has to be 'raw nose' which turned up this old post from earlier blogging days when I wasn't so tech focused and more chit-chatty. I hope the posting answered their question.

This post has proved the most popular.

I am going to install Awstats locally so I can enable the reverse DNS functions and start watching the watchers.


Filed under: awstats google hosting site5


My Site5 hosting service allows me to download access logs which I find enlessly fascinating. The netadmin administration tool offers AwStats which shows incredibly detailed statistics but it is slightly skewed by showing my own access.

So I wrote a python script to parse the log and dump out anything interesting. It filters out IP addresses I am likely to connect from. This is crude in that I have hard wired the log file name. Note that the log file I download is gzipped but that is no problem for python.

This dumps out:

  • suspicious looking attempts to hack in (extremely long strings etc)
  • a list of various user agents and the IP addresses they are coming from
  • a list of referrer strings

Things I find interesting in the dumps:

  • There are 171 different types of user agents listed. Most claim to be mozilla type browsers which is probably rarely true but even so, there are a lot of things crawling around out there. Someone out there is using lynx. Hi there.
  • I get at least one known spam email address harvester visiting (DTS Agent). Be warned. This particular one does not really bother to hide itself.
  • Referrers from drupal.org seem to arrive from random pages on that site. I think folk are browsing around, see something from me in the 'Drupal Talk block and come here for a read. Drupal generates a misleading referrer string.
  • The referrer strings from google give the search terms. I get a number of people looking for r-s-y-n-c w-i-n-2-k (obscured to hide from google) and when I do that search this post somes in at #7 with it's enticing title. Moral: give postings enticing titles.
  • Yahoo Slurp crawls the site about as much as google but gave me one referral compared to 81 from google.

These statistics are for a 7 day period.

import gzip
import re

#
# Open log file. Crude but effective. Reads directly from gzipped log file.
#
oFile = gzip.GzipFile( 'C:\\Tmp\\accesslog-bisiand.me.uk-9-28-2004.gz')

def Sorted( oArray):
    "Return sorted array"
    oTmp = oArray[:]
    oTmp.sort()
    return oTmp

#
# Scan through the log file.
# Use regular expression to split the entries up.
#
# Pattern is thus:
#
# 56.98.204.40 - - [09/Sep/2004:03:50:01 -0400] "GET / HTTP/1.0" 200 643 "-" "
#Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7) Gecko/20040803 Firefox/0.9.3"
#
oRE = re.compile( r'(\d+\.\d+\.\d+\.\d+).*(\[.*\])\s+"(GET|POST|HEAD|SEARCH|PUT)\s+([^"]+)
                       "\s+([\d-]+)\s+([\d-]+)\s+"([^"]+)"\s+"([^"]+)"')

#
# Here build map of IP addresses to the log file entries.
#
oHits = {}

#
# Build map of unique referrers and how many folk they sent my way.
#
oReferrers = {}

#
# Go though file.
#
for strLine in oFile.readlines():
    print strLine[:-1]
    oMatch = oRE.search( strLine)
    if oMatch:
        #
        # These things seem to be used by hackers trying  to break in.
        #
        if oMatch.group(3) in ("PUT", "SEARCH"):
            print strLine
            continue

        #
        # Get the IP address.
        #
        strIP = oMatch.group(1)

        #
        # Ignore the entry if it is me.
        #
        if strIP in ('76.54.32.10', '12.3.45.67'):
            continue

        #
        # Get interesting fields from log file.
        #
        strAccess = oMatch.group( 3) + oMatch.group(4)
        strReferrer = oMatch.group(7)
        strAgent = oMatch.group( 8 )

        #
        # Build up hit map.
        #
        if oHits.has_key( strIP):
            oHits[strIP].append( (strAccess, strReferrer, strAgent))
        else:
            oHits[strIP] = [( strAccess, strReferrer, strAgent)]

        #
        # Build up referred map.
        #
        if oReferrers.has_key( strReferrer):
            oReferrers[strReferrer] += 1
        else:
            oReferrers[strReferrer] = 1
    else:
        #
        # Did not match the regular expression. Just dump the line.
        #
        print "Miss:" + strLine

#
# Determine which user agents originate from which IP.
#
strAgents = {}

for strIP in Sorted(oHits.keys()):
    oHit = oHits[strIP]
    if strAgents.has_key(oHit[0][2]):
        strAgents[oHit[0][2]].append( strIP)
    else:
        strAgents[oHit[0][2]] = [strIP]

#
# Display the unique User Agents and the IPs using them.
# This shows things like googlebot.
#
for strAgent in Sorted( strAgents.keys()):
    strIPs = strAgents[strAgent]
    print strAgent
    for strIP in strIPs:
        print "   %s %d" % (strIP.ljust( 15), len( oHits[strIP]))

#
# How did they get here? Show the referred name.
#
for strReferrer in Sorted( oReferrers.keys()):
    if strReferrer.find( '209.59.159.21') >= 0:
        continue
    if strReferrer.find( 'bisiand.me.uk') >= 0:
        continue
    if len(strReferrer) < 60:
        print strReferrer.ljust( 60) + str(oReferrers[strReferrer])
    else:
        print strReferrer + "\n" + (' ' * 60) + str(oReferrers[strReferrer])