Difference between revisions of "HostIP for geolocation"

From Noah.org
Jump to navigationJump to search
Line 27: Line 27:
 
is stored in a different table. It was pretty easy to write a Python  
 
is stored in a different table. It was pretty easy to write a Python  
 
script that would decode an IP address into {city, state, country, country_code lat, lon}.
 
script that would decode an IP address into {city, state, country, country_code lat, lon}.
 +
This is a short script that does the bare essentials as an example.
 +
I have included a longer script at the end which has more features.
  
 
<pre>
 
<pre>
Line 75: Line 77:
 
         lat: 37.8133
 
         lat: 37.8133
 
         lon: -122.50
 
         lon: -122.50
 +
</pre>
 +
 +
 +
== Full script ==
 +
 +
<pre>
 +
#!/usr/bin/env python
 +
"""HostIP.info geolocation lookup.
 +
Run this script with an IP address as an argument and it will return
 +
the following information: {ip, city, state, country, country_code lat, lon}.
 +
 +
Options:
 +
    -h : Print this help
 +
    -v : Print verbose data (multi line). Default is to print result on a single line.
 +
 +
Download mysql data from http://www.hostip.info/.
 +
This datafile may work in another database with a little tweaking.
 +
Create a database for hostip and load the data:
 +
 +
    $ mysql -u root -p ROOT_PASSWORD -e "create database hostip;"
 +
    $ mysql -u root -p ROOT_PASSWORD hostip < hostip_current.sql
 +
 +
Noah Spurrier 2007
 +
 +
$Id$
 +
"""
 +
# TODO: Add database configuration from the command-line:
 +
#    --host=HOSTNAME (default is 127.0.0.1, localhost)
 +
#    --user=USERNAME (default is root)
 +
#    --password=PASSWORD (default is blank)
 +
#    --database=DB_NAME (default is hostip)
 +
#    --port=PORT_NUM (default is 3306)
 +
#    --dsn=DATASOURCENAME (alternate)
 +
# TODO: Add batch file support of Apache log processing
 +
#    --file=FILENAME : Process all lines in the given file. Pass - to process stdin.
 +
 +
import sys, os, traceback
 +
import re
 +
import getopt
 +
import time
 +
import MySQLdb
 +
import os, sys, urllib
 +
 +
def exit_with_usage ():
 +
    print globals()['__doc__']
 +
    os._exit(1)
 +
 +
def parse_args (options='', long_options=[]):
 +
    try:
 +
        optlist, args = getopt.getopt(sys.argv[1:], options+'h?', long_options+['help','h','?'])
 +
    except Exception, e:
 +
        print str(e)
 +
        exit_with_usage()
 +
    options = dict(optlist)
 +
    if [elem for elem in options if elem in ['-h','--h','-?','--?','--help']]:
 +
        exit_with_usage()
 +
    return (options, args)
 +
 +
def main ():
 +
    (options, args) = parse_args('v')
 +
    # if args<=0:
 +
    #    exit_with_usage()
 +
    if '-v' in options:
 +
        verbose = True
 +
    else:
 +
        verbose = False
 +
 +
    ipstr = args[0]
 +
    ip = ipstr.split('.')
 +
    con = MySQLdb.Connect(host="127.0.0.1", port=3306, user="root", passwd="", db="hostip") #, unix_socket='/var/lib/mysql/mysql.sock')
 +
    cursor = con.cursor()
 +
    table_a = "ip4_" + ip[0]
 +
    net_b = ip[1]
 +
    net_c = ip[2]
 +
    sql = """SELECT cityByCountry.name as city, cityByCountry.state as state,
 +
        countries.name as country, countries.code as country_code,
 +
        cityByCountry.lat as lat, cityByCountry.lng as lon
 +
    FROM %(table_a)s, countries, cityByCountry
 +
    WHERE %(table_a)s.city=cityByCountry.city AND
 +
        %(table_a)s.country=cityByCountry.country AND
 +
        %(table_a)s.country=countries.id AND
 +
        b=%(net_b)s AND c=%(net_c)s;"""
 +
    sql = sql % locals()
 +
    if cursor.execute(sql) == 0:
 +
        con.close()
 +
        print "# %s IP address was not found in the database." % ipstr
 +
        return 1
 +
    con.close()
 +
    results = list(cursor.fetchall()[0])
 +
    results[0] = urllib.unquote(results[0])
 +
    results = [ipstr] + results
 +
    keys = ['ip','city','state','country','country_code','lat','lon']
 +
    results_dict = dict(zip(keys, results))
 +
    if verbose:
 +
        for key in keys:
 +
            print "%12s: %s" %(key, results_dict[key])
 +
    else:
 +
        results_dict['city'] = '"' + results_dict['city'] + '"'
 +
        print "%(ip)s,%(city)s,%(state)s,%(country)s,%(country_code)s,%(lat)s,%(lon)s" % results_dict
 +
    return 0
 +
 +
if __name__ == '__main__':
 +
    try:
 +
        sys.exit(main())
 +
        sys.exit(0)
 +
    except SystemExit, e:
 +
        raise e
 +
    except Exception, e:
 +
        print 'ERROR, UNEXPECTED EXCEPTION'
 +
        print str(e)
 +
        traceback.print_exc()
 +
        os._exit(1)
 
</pre>
 
</pre>

Revision as of 13:35, 27 July 2007


The hostip.info geolocation database is free and accurate. You can use this to turn an IP address into a geographic coordinates. It also includes city, state, and country information.

When I downloaded the bz2 version it appeared to be gzipped multiple times. I don't know why that is, but if you have trouble opening it trying using the `file` command to see what format it is in. Keep gunzipping it until your get ascii text. Here is what I had to do:

 $ bunzip2 hostip_current.sql.bz2
 $ mv hostip_current.sql hostip_current.sql.gz
 $ gunzip hostip_current.sql.gz
 $ mv hostip_current.sql hostip_current.sql.gz # yes, do it again
 $ gunzip hostip_current.sql.gz

I knew I was done when `file` gave me this results:

 $ file hostip_current.sql 
 hostip_current.sql: ASCII text, with very long line

Now, to actually use this data here is what I to did. Create a database for hostip and load the data:

 $ mysql -u root -p ROOT_PASSWORD -e "create database hostip;"
 $ mysql -u root -p ROOT_PASSWORD hostip < hostip_current.sql

That gave me a nice database. Each A block of an IP address is stored in a different table. It was pretty easy to write a Python script that would decode an IP address into {city, state, country, country_code lat, lon}. This is a short script that does the bare essentials as an example. I have included a longer script at the end which has more features.

#!/usr/bin/env python
# hostipgeo.py
# HostIP.info geolocation lookup.
# Run this script with an IP address as an argument and it will return
# the following information: {city, state, country, country_code lat, lon}.
# Noah Spurrier 2007
import MySQLdb
import os, sys, urllib
ip = sys.argv[1].split('.')
con = MySQLdb.Connect(host="127.0.0.1", port=3306, user="root", passwd="", db="hostip") #, unix_socket='/var/lib/mysql/mysql.sock') 
cursor = con.cursor()
table_a = "ip4_" + ip[0]
net_b = ip[1]
net_c = ip[2]
sql = """SELECT cityByCountry.name as city, cityByCountry.state as state,
    countries.name as country, countries.code as country_code,
    cityByCountry.lat as lat, cityByCountry.lng as lon
FROM %(table_a)s, countries, cityByCountry
WHERE %(table_a)s.city=cityByCountry.city AND
    %(table_a)s.country=cityByCountry.country AND
    %(table_a)s.country=countries.id AND
    b=%(net_b)s AND c=%(net_c)s;"""
sql = sql % locals()
if cursor.execute(sql) == 0:
    print "That IP address was not found in the database."
    sys.exit(1)
con.close()
results = list(cursor.fetchall()[0])
results[0] = urllib.unquote(results[0])
keys = ['city','state','county','country_code','lat','lon']
results_dict = dict(zip(keys, results))
for key in keys:
    print "%12s: %s" %(key, results_dict[key])
sys.exit(0)

Running this script on the IP address 69.80.208.1 gives the following:

$ ./hostipgeo.py 69.80.208.1
        city: San Francisco, CA
       state: California
      county: UNITED STATES
country_code: US
         lat: 37.8133
         lon: -122.50


Full script

#!/usr/bin/env python
"""HostIP.info geolocation lookup.
Run this script with an IP address as an argument and it will return
the following information: {ip, city, state, country, country_code lat, lon}.

Options:
    -h : Print this help
    -v : Print verbose data (multi line). Default is to print result on a single line.

Download mysql data from http://www.hostip.info/.
This datafile may work in another database with a little tweaking.
Create a database for hostip and load the data:

    $ mysql -u root -p ROOT_PASSWORD -e "create database hostip;"
    $ mysql -u root -p ROOT_PASSWORD hostip < hostip_current.sql

Noah Spurrier 2007

$Id$
"""
# TODO: Add database configuration from the command-line:
#    --host=HOSTNAME (default is 127.0.0.1, localhost)
#    --user=USERNAME (default is root)
#    --password=PASSWORD (default is blank)
#    --database=DB_NAME (default is hostip)
#    --port=PORT_NUM (default is 3306)
#    --dsn=DATASOURCENAME (alternate)
# TODO: Add batch file support of Apache log processing
#    --file=FILENAME : Process all lines in the given file. Pass - to process stdin.

import sys, os, traceback
import re
import getopt
import time
import MySQLdb
import os, sys, urllib

def exit_with_usage ():
    print globals()['__doc__']
    os._exit(1)

def parse_args (options='', long_options=[]):
    try:
        optlist, args = getopt.getopt(sys.argv[1:], options+'h?', long_options+['help','h','?'])
    except Exception, e:
        print str(e)
        exit_with_usage()
    options = dict(optlist)
    if [elem for elem in options if elem in ['-h','--h','-?','--?','--help']]:
        exit_with_usage()
    return (options, args)

def main ():
    (options, args) = parse_args('v')
    # if args<=0:
    #     exit_with_usage()
    if '-v' in options:
        verbose = True
    else:
        verbose = False

    ipstr = args[0]
    ip = ipstr.split('.')
    con = MySQLdb.Connect(host="127.0.0.1", port=3306, user="root", passwd="", db="hostip") #, unix_socket='/var/lib/mysql/mysql.sock') 
    cursor = con.cursor()
    table_a = "ip4_" + ip[0]
    net_b = ip[1]
    net_c = ip[2]
    sql = """SELECT cityByCountry.name as city, cityByCountry.state as state,
        countries.name as country, countries.code as country_code,
        cityByCountry.lat as lat, cityByCountry.lng as lon
    FROM %(table_a)s, countries, cityByCountry
    WHERE %(table_a)s.city=cityByCountry.city AND
        %(table_a)s.country=cityByCountry.country AND
        %(table_a)s.country=countries.id AND
        b=%(net_b)s AND c=%(net_c)s;"""
    sql = sql % locals()
    if cursor.execute(sql) == 0:
        con.close()
        print "# %s IP address was not found in the database." % ipstr
        return 1
    con.close()
    results = list(cursor.fetchall()[0])
    results[0] = urllib.unquote(results[0])
    results = [ipstr] + results
    keys = ['ip','city','state','country','country_code','lat','lon']
    results_dict = dict(zip(keys, results))
    if verbose:
        for key in keys:
            print "%12s: %s" %(key, results_dict[key])
    else:
        results_dict['city'] = '"' + results_dict['city'] + '"'
        print "%(ip)s,%(city)s,%(state)s,%(country)s,%(country_code)s,%(lat)s,%(lon)s" % results_dict
    return 0

if __name__ == '__main__':
    try:
        sys.exit(main())
        sys.exit(0)
    except SystemExit, e:
        raise e
    except Exception, e:
        print 'ERROR, UNEXPECTED EXCEPTION'
        print str(e)
        traceback.print_exc()
        os._exit(1)