""" Project ARC Geog 376 Class Spring 2010 identify HSIP zip files, create new folders to contain files extract from top level 19 zipfiles extract 227 zip files from next level 7 component files per zip = 1589 unzipped files to be created clip each of 227 new .shp file by california.shp reproject HSIP data to common projection edited by ted woodward tested with Python 25.5 start 4-1-2010 finish dev 5-10-2010 zipfile, os.walk ideas borrowed http://stackoverflow.com/questions/120656/directory-listing-in-python http://www.devshed.com/c/a/Python/Python-UnZipped/ http://www.techlists.org/archives/programming/pythonlist/2007-08 http://bytes.com/topic/python/answers/630194-get-file-names-folder-using-python#post2494154 http://sebsauvage.net/python http://www.goldb.org/goldblog/2007/11/27/PythonExtractingFilesFromZipArchives.aspx http://stackoverflow.com/questions/639962/unzipping-directory-structure-with-python reproject ideas borrowed from http://forums.esri.com/Thread.asp?c=93&f=1729&t=250287 borrowed from http://proceedings.esri.com/dvd/uc/2009/uc/tws/workshops/tw_218.ppt borrowed from http://webhelp.esri.com/arcgisdesktop/9.3/index.cfm?TopicName=Project_%28Data_Management%29 then edited , not written for gui no log for this process """ # Import system modules import os os.chdir(r'F:\Program Files\ArcGIS\bin') import sys, string, os, arcgisscripting, traceback, time, datetime, zipfile, zlib from tedsBest_zip_routines import * tcurdir = os.getcwd() sys.path.append(tcurdir) CurDate = datetime.date.today() # Create the Geoprocessor object -maybe not necessary here ? gp = arcgisscripting.create() # Set the Geoprocessing environment... gp.extent = "MAXOF" # this solves the GPError GP ERRORS: # ERROR 000953: Extent of the overlay operation will be empty. Failed to execute (Clip). # error previously occurred in gp.Clip_analysis(tobeClipped,Ca_Clipper,clipOutputSHP,"") # set the workspace gp.workspace = "H:\\los_rios_course_376_python\\Project" #create new sub folders to contain files here datafolder = gp.workspace + "\\Data\\HSIP_Freedom_2010_4-5" # replace with sysarg later ? # datafolder contains 19 zip files when we start, we'll create and add many folders\ subfolders # Load required toolboxes... gp.AddToolbox("F:/Program Files/ArcGIS/ArcToolbox/Toolboxes/Analysis Tools.tbx") #raw_input("0 Much of the initial unzip work will be done in one of two cmd windows. Watch the screen. The python window is a visual review of files as well. Press any key to continue") print "Much of the initial unzip work will be done in one of two cmd windows. Watch the screen. The python window is a visual review of files as well. " print " " # Allow output to overwrite gp.OverwriteOutput = 1 # Create logfile to record print statments logpath = "H:\\los_rios_course_376_python\\Project\\Log\\" # replace with sysarg later ? print "Logpath = " + logpath print "Here is a list of the 19 top level HSIP data zip files:" print " Toplevel zip files will be unzipped to one of many newly created directories. " print " Downlevel zip files will also be unzipped to new directory locations. " print '-'*40 print " " # next print to pyscreen is far below ,just before zfile.printdir logfile = logpath + "HSIP_Zip_Files_Discovery_" + str(CurDate) + ".txt" # replace with sysarg later ? # Delete log file if it exists if gp.exists(logfile): gp.delete_management(logfile) #Create a log we can review, one of several logs "HSIP_Zip_Files_Discovery_" HSIPlog = open(logfile, 'a') # this line allows print messages to be written # to the logfile. 'a' sets the file for "appending" print >> HSIPlog, "Log started : " + str(CurDate) print >> HSIPlog, "Log path is " + logpath print >> HSIPlog, "This Log is " + logfile print >> HSIPlog, " " print >> HSIPlog, "Starting to get file and folder names in datafolder " + datafolder print >> HSIPlog, " " #---------------------------------- try: # start in folder HSIP_Freedom_2010_4-5 # Log work - - - -open this top level folder to look and list out names of TL zipfiles fileList = getfilenames_In_Folder(datafolder)# def found in tedsBest_zip_routines lineNum = 1 for f in fileList: #first pass through filelist of HSIP_Freedom_2010_4-5 shortpath = os.path.basename(f) # f is each top level zip short filename print >>HSIPlog, "File Num " +str(lineNum) + ". " + shortpath lineNum = lineNum +1 print >>HSIPlog, '-'*20 + " end first listing top level zip files" +'-'*20 print >>HSIPlog, " " newLineNum = 1 CountZipFiles =0 UberCount = 0 #raw_input("1 Press any key to continue.")#remove this later for f in fileList: #second pass through filelist of HSIP_Freedom_2010_4-5 shortpath2 = os.path.basename(f) #they are all zip files # open each top level zipped file to look and borrow naming print "Contents of top level zip named " + shortpath2 #ok #print '-'*20 + "start zipcontents dump " + '-'*20 print >>HSIPlog, str(newLineNum) + ". Contents of Zip File: " + shortpath2 zfile = zipfile.ZipFile( f, "a" ) # or "r" or "w", might work here also # retrieve information about the zip file ONLY to screen zfile.printdir()#prints a line for whatever number of files there are with details # zfile.printdir() will not send to a log print '-'*20 + " end zfile.printdir() function output " + '-'*20 #ok print " " firstCount = len(zfile.namelist()) CountZipFiles += len(zfile.namelist()) print >>HSIPlog, '-'*20 + "Count of contents of this 1 top level zip = " + str(firstCount) + '-'*20 print >>HSIPlog, " " print >>HSIPlog, '-'*20 + "end zipcontents dump " +'-'*20 print '-'*20 + "Count of contents of this 1 top level zip = " + str(firstCount) + '-'*20 print " " UberCount = UberCount + CountZipFiles newLineNum = newLineNum +1 zfile.close() print >>HSIPlog, '-'*20 + "Accumulated Count of ALL sub level zips = " + str(CountZipFiles) + '-'*20 print >>HSIPlog, " " print >>HSIPlog, '-'*20 + "end zipcontents dump " +'-'*20 print '-'*20 + "Accumulated Count of contents of ALL sub level zips = " + str(CountZipFiles) + '-'*20 print " " Count_sub_zips = UberCount Count_TL_Zips =newLineNum -1 #good as is this # this does a big piece of PART1 work to unzip the top level ZIPS # displays new sub-zips names and paths # raw_input("2 That was show and tell. Batchfiles will now do their work to unzip all sub-level zip files contents. This will take some time. Press any key to continue.") print "2 That was show and tell. Batchfiles will now do their work to unzip all sub-level zip files contents. This will take some time." print " " #call 2 bat files that unzip the tl and sub zips BatOne = "H:\\los_rios_course_376_python\\Project\\workingCode\\get_TLzips-1.bat" BatTwo = "H:\\los_rios_course_376_python\\Project\\workingCode\\getsubzips-501.bat" os.system(BatOne) os.system(BatTwo) print " " print "Part one of this routine complete." print "All 227 zip files unzipped to H:\los_rios_course_376_python\Project\Data\HSIP_Freedom_2010_4-5\working" print "and subfolders" # this all works #raw_input("3 Sublevel zips now expanded in separate folders. Next we clip all shape files by california.shp Press any key to continue") print "Sublevel zips now expanded in separate folders. Next we clip all shape files by california.shp Press any key to continue" print " " # Process: now we need to Clip all shp files by shape of ca # put new shape file in same folder? newSearchFolder = "H:\\los_rios_course_376_python\\Project\\Data\\HSIP_Freedom_2010_4-5\\working" for root, dirs, files in os.walk(newSearchFolder): # uses "H:\\los_rios_course_376_python\\Project\\Data\\HSIP_Freedom_2010_4-5\\working subzipCount = 0 for name in files: extension = os.path.splitext(name)[1] if extension == ".shp" : """ print "ext = " +extension print name print "newSearchFolder = " +newSearchFolder print "root = " + root """ filename = os.path.join(root, name) #print "first we look in this dir " +os.path.dirname(filename) #print " for " +filename baseRoot = "H:\\los_rios_course_376_python\\Project\\Data\\HSIP_Freedom_2010_4-5\\working" #static shortpathTBC = os.path.basename(name) # ?? Nuclear\NuclearFuel\NuclearFuel.shp #print shortpathTBC tobeClipped = filename print "to be Clipped = " + tobeClipped Ca_Clipper = "H:\\los_rios_course_376_python\\Project\\Data\\base\\California.shp" #static new_outputName = "\\Ca_" +shortpathTBC print "new_ output Name = " +new_outputName clipOutputSHP = root + new_outputName #print "clip Output SHP = " +clipOutputSHP #raw_input("Press any key to continue.") gp.Clip_analysis(tobeClipped,Ca_Clipper,clipOutputSHP,"") print "new feature at - " + clipOutputSHP #raw_input("Press any key to continue.") subzipCount = subzipCount +1 print "All new features -clipped by california shape - created." print " " print " " raw_input("Press any key to continue.") print " " print " " print "Starting re -Projection for Ca-clipped shape files ONLY " print "whose current projection" print "does not match NAD 1983 StatePlane California II FIPS 0402 (Feet). " print " " num =1 Reproject_workspace = newSearchFolder # Set the desired spatial reference variable - cs cs = "F:\\Program Files\\ArcGIS\\Coordinate Systems\\Projected Coordinate Systems\\State Plane\\NAD 1983 (Feet)\\ NAD 1983 StatePlane California II FIPS 0402 (Feet).prj" #cs doesnt work to be passed in as parameter # so I used modelbuilder to get a bit o'script cs2 = "PROJCS['NAD_1983_StatePlane_California_II_FIPS_0402_Feet',GEOGCS['GCS_North_American_1983',DATUM['D_North_American_1983',SPHEROID['GRS_1980',6378137.0,298.257222101]],PRIMEM['Greenwich',0.0],UNIT['Degree',0.0174532925199433]],PROJECTION['Lambert_Conformal_Conic'],PARAMETER['False_Easting',6561666.666666666],PARAMETER['False_Northing',1640416.666666667],PARAMETER['Central_Meridian',-122.0],PARAMETER['Standard_Parallel_1',38.33333333333334],PARAMETER['Standard_Parallel_2',39.83333333333334],PARAMETER['Latitude_Of_Origin',37.66666666666666],UNIT['Foot_US',0.3048006096012192]]" #could not get exactsyntax except by running arcmap modelbuilder for root, dirs, files in os.walk(Reproject_workspace): for shp in files: extension = os.path.splitext(shp)[1] if extension == ".shp" : #obviously we are looking for shapefiles here if shp[:3] == "Ca_" : #look for Ca_ prefix to indicate clipped files if not shp[-12:] == "_SP_CAII.shp" : #exclude already created files as script will ther wise continue to add suffix as below print " " #but gp.OverwriteOutput allows overwrite print "Shapefile ",str(num), " : ", str(shp) filename = os.path.join(root, shp) #print "filename = ", filename baseoutput = os.path.basename(filename)[:-4] + "_SP_CAII.shp" #creates a tag identifying the new projection # we could do a batch rename later I suppose output = os.path.join(root, baseoutput) #print "output = ", output if gp.Exists(output): gp.Delete_management(output) desc = gp.describe(filename) #print desc") sr = desc.SpatialReference #print sr SR_name = desc.spatialreference.name if SR_name == "Unknown": # 3 possibilities re SR_name, improbable that this data will be without spatial reference print "Unknown Projection!" gp.AddError("Unknown Projection!")#more serious that different SR gp.DefineProjection(filename, cs2) # never actually had this option run print "Completed adding SP_CAII coordinate system to " + shp elif SR_name == 'NAD 1983 StatePlane California II FIPS 0402 (Feet)': print " " gp.AddMessage('Projection matches desired NAD 1983 StatePlane California II FIPS 0402 (Feet).') print "Projection matches desired NAD 1983 StatePlane California II FIPS 0402 (Feet)." else : print shp + " -Projection does not match NAD 1983 StatePlane California II FIPS 0402 (Feet)!" gp.AddWarning('Projection does not match NAD 1983 StatePlane California II FIPS 0402 (Feet)!') gp.AddWarning(SR_name) if SR_name.find("WGS_1984")==-1: trans = "#" # or remove this line else: trans = "NAD_1983_To_WGS_1984_1" #gp.AddMessage('Parameters:'+ shp+','+output+','+cs+','+trans) #print "Parameters: "+ shp+","+output+","+cs+","+trans #raw_input("3 Press any key to perform reprojection, or press x, top right, to stop here.") print "Starting reprojection." #syntax Project_management (in_dataset, out_dataset, out_coor_system, transform_method(optional), in_coor_system) gp.Project_management(filename, output, cs2, trans) print "Reprojection complete for - ", filename print "new file at -", output #raw_input("Press any key to continue.") num = num +1 #print totals to logs print '-'*20 + "Count TL Zips = " + str(Count_TL_Zips) + '-'*20 #should be 19 print >> HSIPlog,'-'*20 + "Count TL Zips = " + str(Count_TL_Zips) + '-'*20 print '-'*20 + "Count sub Zips = " + str(Count_sub_zips) + '-'*20 # should be ?2191 ? not sure why this number reliably occurrs print >> HSIPlog,'-'*20 + "Count sub Zips = " + str(Count_sub_zips) + '-'*20 print >> HSIPlog, " " print >> HSIPlog, "Closed log." HSIPlog.close() print '-'*20 + " Logfile created, and closed" + '-'*20 except: tb = sys.exc_info()[2] tbinfo = traceback.format_tb(tb)[0] pymsg = "PYTHON ERRORS:\nTraceback Info:\n" + tbinfo + "\nError Info:\n " + str(sys.exc_type) + ": " + str(sys.exc_value) + "\n" msgs = "GP ERRORS:\n" + gp.GetMessages(2) + "\n" gp.AddError(msgs) gp.AddError(pymsg) print msgs print pymsg gp.AddMessage(gp.GetMessages(1)) print gp.GetMessages(1)