Difference between revisions of "HostIP for geolocation"
Line 27: | Line 27: | ||
is stored in a different table. It was pretty easy to write a Python | is stored in a different table. It was pretty easy to write a Python | ||
script that would decode an IP address into {city, state, country, country_code lat, lon}. | script that would decode an IP address into {city, state, country, country_code lat, lon}. | ||
+ | This is a short script that does the bare essentials as an example. | ||
+ | I have included a longer script at the end which has more features. | ||
<pre> | <pre> | ||
Line 75: | Line 77: | ||
lat: 37.8133 | lat: 37.8133 | ||
lon: -122.50 | lon: -122.50 | ||
+ | </pre> | ||
+ | |||
+ | |||
+ | == Full script == | ||
+ | |||
+ | <pre> | ||
+ | #!/usr/bin/env python | ||
+ | """HostIP.info geolocation lookup. | ||
+ | Run this script with an IP address as an argument and it will return | ||
+ | the following information: {ip, city, state, country, country_code lat, lon}. | ||
+ | |||
+ | Options: | ||
+ | -h : Print this help | ||
+ | -v : Print verbose data (multi line). Default is to print result on a single line. | ||
+ | |||
+ | Download mysql data from http://www.hostip.info/. | ||
+ | This datafile may work in another database with a little tweaking. | ||
+ | Create a database for hostip and load the data: | ||
+ | |||
+ | $ mysql -u root -p ROOT_PASSWORD -e "create database hostip;" | ||
+ | $ mysql -u root -p ROOT_PASSWORD hostip < hostip_current.sql | ||
+ | |||
+ | Noah Spurrier 2007 | ||
+ | |||
+ | $Id$ | ||
+ | """ | ||
+ | # TODO: Add database configuration from the command-line: | ||
+ | # --host=HOSTNAME (default is 127.0.0.1, localhost) | ||
+ | # --user=USERNAME (default is root) | ||
+ | # --password=PASSWORD (default is blank) | ||
+ | # --database=DB_NAME (default is hostip) | ||
+ | # --port=PORT_NUM (default is 3306) | ||
+ | # --dsn=DATASOURCENAME (alternate) | ||
+ | # TODO: Add batch file support of Apache log processing | ||
+ | # --file=FILENAME : Process all lines in the given file. Pass - to process stdin. | ||
+ | |||
+ | import sys, os, traceback | ||
+ | import re | ||
+ | import getopt | ||
+ | import time | ||
+ | import MySQLdb | ||
+ | import os, sys, urllib | ||
+ | |||
+ | def exit_with_usage (): | ||
+ | print globals()['__doc__'] | ||
+ | os._exit(1) | ||
+ | |||
+ | def parse_args (options='', long_options=[]): | ||
+ | try: | ||
+ | optlist, args = getopt.getopt(sys.argv[1:], options+'h?', long_options+['help','h','?']) | ||
+ | except Exception, e: | ||
+ | print str(e) | ||
+ | exit_with_usage() | ||
+ | options = dict(optlist) | ||
+ | if [elem for elem in options if elem in ['-h','--h','-?','--?','--help']]: | ||
+ | exit_with_usage() | ||
+ | return (options, args) | ||
+ | |||
+ | def main (): | ||
+ | (options, args) = parse_args('v') | ||
+ | # if args<=0: | ||
+ | # exit_with_usage() | ||
+ | if '-v' in options: | ||
+ | verbose = True | ||
+ | else: | ||
+ | verbose = False | ||
+ | |||
+ | ipstr = args[0] | ||
+ | ip = ipstr.split('.') | ||
+ | con = MySQLdb.Connect(host="127.0.0.1", port=3306, user="root", passwd="", db="hostip") #, unix_socket='/var/lib/mysql/mysql.sock') | ||
+ | cursor = con.cursor() | ||
+ | table_a = "ip4_" + ip[0] | ||
+ | net_b = ip[1] | ||
+ | net_c = ip[2] | ||
+ | sql = """SELECT cityByCountry.name as city, cityByCountry.state as state, | ||
+ | countries.name as country, countries.code as country_code, | ||
+ | cityByCountry.lat as lat, cityByCountry.lng as lon | ||
+ | FROM %(table_a)s, countries, cityByCountry | ||
+ | WHERE %(table_a)s.city=cityByCountry.city AND | ||
+ | %(table_a)s.country=cityByCountry.country AND | ||
+ | %(table_a)s.country=countries.id AND | ||
+ | b=%(net_b)s AND c=%(net_c)s;""" | ||
+ | sql = sql % locals() | ||
+ | if cursor.execute(sql) == 0: | ||
+ | con.close() | ||
+ | print "# %s IP address was not found in the database." % ipstr | ||
+ | return 1 | ||
+ | con.close() | ||
+ | results = list(cursor.fetchall()[0]) | ||
+ | results[0] = urllib.unquote(results[0]) | ||
+ | results = [ipstr] + results | ||
+ | keys = ['ip','city','state','country','country_code','lat','lon'] | ||
+ | results_dict = dict(zip(keys, results)) | ||
+ | if verbose: | ||
+ | for key in keys: | ||
+ | print "%12s: %s" %(key, results_dict[key]) | ||
+ | else: | ||
+ | results_dict['city'] = '"' + results_dict['city'] + '"' | ||
+ | print "%(ip)s,%(city)s,%(state)s,%(country)s,%(country_code)s,%(lat)s,%(lon)s" % results_dict | ||
+ | return 0 | ||
+ | |||
+ | if __name__ == '__main__': | ||
+ | try: | ||
+ | sys.exit(main()) | ||
+ | sys.exit(0) | ||
+ | except SystemExit, e: | ||
+ | raise e | ||
+ | except Exception, e: | ||
+ | print 'ERROR, UNEXPECTED EXCEPTION' | ||
+ | print str(e) | ||
+ | traceback.print_exc() | ||
+ | os._exit(1) | ||
</pre> | </pre> |
Revision as of 13:35, 27 July 2007
The hostip.info geolocation database is free
and accurate. You can use this to turn an IP address into a geographic coordinates. It also includes city, state, and country information.
When I downloaded the bz2 version it appeared to be gzipped multiple times. I don't know why that is, but if you have trouble opening it trying using the `file` command to see what format it is in. Keep gunzipping it until your get ascii text. Here is what I had to do:
$ bunzip2 hostip_current.sql.bz2 $ mv hostip_current.sql hostip_current.sql.gz $ gunzip hostip_current.sql.gz $ mv hostip_current.sql hostip_current.sql.gz # yes, do it again $ gunzip hostip_current.sql.gz
I knew I was done when `file` gave me this results:
$ file hostip_current.sql hostip_current.sql: ASCII text, with very long line
Now, to actually use this data here is what I to did. Create a database for hostip and load the data:
$ mysql -u root -p ROOT_PASSWORD -e "create database hostip;" $ mysql -u root -p ROOT_PASSWORD hostip < hostip_current.sql
That gave me a nice database. Each A block of an IP address is stored in a different table. It was pretty easy to write a Python script that would decode an IP address into {city, state, country, country_code lat, lon}. This is a short script that does the bare essentials as an example. I have included a longer script at the end which has more features.
#!/usr/bin/env python # hostipgeo.py # HostIP.info geolocation lookup. # Run this script with an IP address as an argument and it will return # the following information: {city, state, country, country_code lat, lon}. # Noah Spurrier 2007 import MySQLdb import os, sys, urllib ip = sys.argv[1].split('.') con = MySQLdb.Connect(host="127.0.0.1", port=3306, user="root", passwd="", db="hostip") #, unix_socket='/var/lib/mysql/mysql.sock') cursor = con.cursor() table_a = "ip4_" + ip[0] net_b = ip[1] net_c = ip[2] sql = """SELECT cityByCountry.name as city, cityByCountry.state as state, countries.name as country, countries.code as country_code, cityByCountry.lat as lat, cityByCountry.lng as lon FROM %(table_a)s, countries, cityByCountry WHERE %(table_a)s.city=cityByCountry.city AND %(table_a)s.country=cityByCountry.country AND %(table_a)s.country=countries.id AND b=%(net_b)s AND c=%(net_c)s;""" sql = sql % locals() if cursor.execute(sql) == 0: print "That IP address was not found in the database." sys.exit(1) con.close() results = list(cursor.fetchall()[0]) results[0] = urllib.unquote(results[0]) keys = ['city','state','county','country_code','lat','lon'] results_dict = dict(zip(keys, results)) for key in keys: print "%12s: %s" %(key, results_dict[key]) sys.exit(0)
Running this script on the IP address 69.80.208.1 gives the following:
$ ./hostipgeo.py 69.80.208.1 city: San Francisco, CA state: California county: UNITED STATES country_code: US lat: 37.8133 lon: -122.50
Full script
#!/usr/bin/env python """HostIP.info geolocation lookup. Run this script with an IP address as an argument and it will return the following information: {ip, city, state, country, country_code lat, lon}. Options: -h : Print this help -v : Print verbose data (multi line). Default is to print result on a single line. Download mysql data from http://www.hostip.info/. This datafile may work in another database with a little tweaking. Create a database for hostip and load the data: $ mysql -u root -p ROOT_PASSWORD -e "create database hostip;" $ mysql -u root -p ROOT_PASSWORD hostip < hostip_current.sql Noah Spurrier 2007 $Id$ """ # TODO: Add database configuration from the command-line: # --host=HOSTNAME (default is 127.0.0.1, localhost) # --user=USERNAME (default is root) # --password=PASSWORD (default is blank) # --database=DB_NAME (default is hostip) # --port=PORT_NUM (default is 3306) # --dsn=DATASOURCENAME (alternate) # TODO: Add batch file support of Apache log processing # --file=FILENAME : Process all lines in the given file. Pass - to process stdin. import sys, os, traceback import re import getopt import time import MySQLdb import os, sys, urllib def exit_with_usage (): print globals()['__doc__'] os._exit(1) def parse_args (options='', long_options=[]): try: optlist, args = getopt.getopt(sys.argv[1:], options+'h?', long_options+['help','h','?']) except Exception, e: print str(e) exit_with_usage() options = dict(optlist) if [elem for elem in options if elem in ['-h','--h','-?','--?','--help']]: exit_with_usage() return (options, args) def main (): (options, args) = parse_args('v') # if args<=0: # exit_with_usage() if '-v' in options: verbose = True else: verbose = False ipstr = args[0] ip = ipstr.split('.') con = MySQLdb.Connect(host="127.0.0.1", port=3306, user="root", passwd="", db="hostip") #, unix_socket='/var/lib/mysql/mysql.sock') cursor = con.cursor() table_a = "ip4_" + ip[0] net_b = ip[1] net_c = ip[2] sql = """SELECT cityByCountry.name as city, cityByCountry.state as state, countries.name as country, countries.code as country_code, cityByCountry.lat as lat, cityByCountry.lng as lon FROM %(table_a)s, countries, cityByCountry WHERE %(table_a)s.city=cityByCountry.city AND %(table_a)s.country=cityByCountry.country AND %(table_a)s.country=countries.id AND b=%(net_b)s AND c=%(net_c)s;""" sql = sql % locals() if cursor.execute(sql) == 0: con.close() print "# %s IP address was not found in the database." % ipstr return 1 con.close() results = list(cursor.fetchall()[0]) results[0] = urllib.unquote(results[0]) results = [ipstr] + results keys = ['ip','city','state','country','country_code','lat','lon'] results_dict = dict(zip(keys, results)) if verbose: for key in keys: print "%12s: %s" %(key, results_dict[key]) else: results_dict['city'] = '"' + results_dict['city'] + '"' print "%(ip)s,%(city)s,%(state)s,%(country)s,%(country_code)s,%(lat)s,%(lon)s" % results_dict return 0 if __name__ == '__main__': try: sys.exit(main()) sys.exit(0) except SystemExit, e: raise e except Exception, e: print 'ERROR, UNEXPECTED EXCEPTION' print str(e) traceback.print_exc() os._exit(1)