User:The Anome/NRIS kml extractor
Appearance
#Copyright (c) 2011 The Anome # #Permission is hereby granted, free of charge, to any person obtaining a copy #of this software and associated documentation files (the "Software"), to deal #in the Software without restriction, including without limitation the rights #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell #copies of the Software, and to permit persons to whom the Software is #furnished to do so, subject to the following conditions: # #The above copyright notice and this permission notice shall be included in #all copies or substantial portions of the Software. # #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN #THE SOFTWARE. import string, re placemarks = re.findall(r"(?ms)<Placemark>.*?</Placemark>", open("doc.kml").read()) cdata_re = r"""(?ms)<!\[CDATA\[<b>Historic Place Name: </b>(.*?)<br /><b>Address: </b>(.*?)<br /><b>City: </b>(.*?)<br /><b>County: </b>(.*?)<br /><b>State: </b>(.*?)<br /><br /><u>Geographic Coordinates:</u><br /><b>Latitude: </b>(.*?)<br /><b>Longitude: </b>(.*?)<br /><br /><b>NPS Reference Number: </b>(.*?)<br /><b>Date Listed: </b>(.*?)<br /><b>Notes: </b>(.*?)<br /><b>Type: </b>(.*?)<br /><b>Geocode Match: </b>(.*?)<br /><br /><p align="center">A Service of:<br /><a href="http://www\.cr\.nps\.gov/nr/">National Register of Historic Places</a> <br /><a href="http://www\.nps\.gov/">National Park Service</a></p>\]\]>""" for placemark in placemarks: fields = re.findall(r"(?ms)^.*?<name>(.*?)</name>.*?<description>(.*?)</description>.*?<coordinates>(.*?),(.*?),(.*?)</coordinates>.*?$", placemark) try: name, description, lon, lat, etc = fields[0] description = string.join(string.split(description)) cdata = re.findall(cdata_re, description) print string.join([repr(string.strip(x)) for x in ["OK", name, lat, lon] + list(cdata[0])], ", ") except: print string.join([repr(string.strip(x)) for x in ["ERROR", name, lat, lon]], ", ")