Creating Multiple featureclasses from data in .txt Files - python

I am trying to create multiple feature classes from data with .txt extension. My code runs, but only produces one .shp file. The variable xyTable when checked does contain all the file extensions. These then should individually run through both Arcpy functions and produce the relevant featureclass files named in accordance with their .txt files.
import arcpy
import os
import tempfile
import shutil
shpFileArray = []
print "\n"
arcpy.env.overwriteOutput = True
newFolder = "destinationpath"
if os.path.exists(newFolder):
tmp = tempfile.mktemp(dir=os.path.dirname(newFolder))
shutil.move(newFolder, tmp)
shutil.rmtree(tmp)
os.makedirs(newFolder)
arcpy.env.workspace = newFolder
for file in os.listdir("sourcepath"):
layerName = file[:-4]
fileSHP = layerName+".shp"
for file in os.listdir("sourcepath"):
if file.endswith(".txt"):
xyTable = (os.path.join("destinationpath", file))
arcpy.MakeXYEventLayer_management(table= xyTable, in_x_field="EastingM", in_y_field="NorthingM", out_layer="layerName",...continues...
arcpy.FeatureClassToFeatureClass_conversion(in_features="layerName", out_path="destinationpath", out_name= fileSHP,....continues....

Looks like you are not giving the FeatureClassToFeatureClass tool unique shapefile names. After the first For loop finishes, fileSHP doesn't change. Looks like you have the shpFileArray set up to hold the list of fileSHPs. Perhaps try something like this to save your set of fileSHPs in the first For loop and refer to them in the second For loop. My python might not be exactly right, but I think the idea is.
import arcpy
import os
import tempfile
import shutil
shpFileArray = []
print "\n"
arcpy.env.overwriteOutput = True
newFolder = "destinationpath"
if os.path.exists(newFolder):
tmp = tempfile.mktemp(dir=os.path.dirname(newFolder))
shutil.move(newFolder, tmp)
shutil.rmtree(tmp)
os.makedirs(newFolder)
arcpy.env.workspace = newFolder
for file in os.listdir("sourcepath"):
layerName = file[:-4]
fileSHP = layerName+".shp"
shpFileArray.append(fileSHP)
for idx, file in enumerate(os.listdir("sourcepath")):
if file.endswith(".txt"):
xyTable = (os.path.join("destinationpath", file))
outShape = shapeFileArray[idx]
arcpy.MakeXYEventLayer_management(table= xyTable, in_x_field="EastingM", in_y_field="NorthingM", out_layer="layerName",...continues...
arcpy.FeatureClassToFeatureClass_conversion(in_features="layerName", out_path="destinationpath", out_name= outShape,....continues....

Related

I want to move a file based on part of the name to a folder with that name

I have a directory with a large number of files that I want to move into folders based on part of the file name. My list of files looks like this:
001-020-012B-B.nc
001-022-151-A.nc
001-023-022-PY-T1.nc.nc
001-096-016B-A.nc
I want to move the files I have into separate folders based on the first part of the file name (001-096-016B, 001-023-022, 001-022-151). The first parts of the file name always have the same number of numbers and are always in 3 parts separated by an underscore '-'.
The folder names are named like this \oe-xxxx\xxxx\xxxx\001-Disc-PED\020-Rotor-parts-1200.
So for example, this file should be placed in the above folder, based on the folder name (the numbers):
001-020-012B-B.nc
File path divided into column to show where the above file has to be moved to:
(001)-Disc-PED\(020)-Rotor-parts-1200.
Therefore:
(001)-Disc-PED\(020)-Rotor-parts-1200 (001)-(020)-012B-B.nc
This is what I have tried from looking online but it does not work:
My thinking is I want to loop through the folders and look for matches.
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
first33CharsOfRoot = str(root[0:33])
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'(.{3})\s*$', str(cleanRoot5))
print(firstCharOfRoot==first3Char)
if(firstCharOfRoot == first3Char):
print("Hello")
for root in os.walk(destinationPath):
print(os.path.basename(root))
# if(os.path)
I realized that I should not look for the last 3 chars in the path, because it is the first (001) etc. Numbers that I need to look for in the beginning to find the first path that I need to go to.
EDIT:
import os
import glob
import itertools
import re
#Source file
sourcefile = r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
#Seperation
dirs = glob.glob('*-*')
#Every file with file extension .nc
files = glob.glob('*.nc')
for root, dirs, files in os.walk(sourcefile):
for file in files:
if file.endswith(".nc"):
first3Char = str(file[0:3])
last3Char = str(file[4:7])
for root in os.walk(destinationPath):
cleanRoot1 = str(root).replace("[", "")
cleanRoot2 = str(cleanRoot1).replace("]", "")
cleanRoot3 = str(cleanRoot2).replace(")", "")
cleanRoot4 = str(cleanRoot3).replace("'", "")
cleanRoot5 = str(cleanRoot4).replace(",", "")
firstCharOfRoot = re.findall(r'^(?:[^\\]+\\\\){5}(\d+).*$', str(cleanRoot5))
secondCharOfRoot = re.findall(r'^(?:[^\\]+\\\\){6}(\d+).*$', str(cleanRoot5))
firstCharOfRootCleaned = ''.join(firstCharOfRoot)
secondCharOfRoot = ''.join(secondCharOfRoot)
cleanRoot6 = str(cleanRoot5).replace("(", "")
if(firstCharOfRootCleaned == str(first3Char) & secondCharOfRoot == str(last3Char)):
print("BINGOf")
# for root1 in os.walk(cleanRoot6):
Solution
There is an improved solution in the next section. But let's decompose the straightforward solution before.
First, get the complete list of subfolders.
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath, "**"), recursive=True)\
if os.path.isdir(f)]
Then, use a function on each of your file to find its matching folder, or a new filepath if it doesn't exist. I include this function called find_folder() in the rest of the script:
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
# It will create and return a new directory if no directory matches
def find_folder(part1, part2):
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
return os.path.join(*matching_folder2)
# Whole new folder tree
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
os.makedirs(dest)
return dest
# Inside the already existing folder part "1"
dest = os.path.join(matching_folder2[0], part2)
os.makedirs(dest)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
Improved solution
In case you have a large number of files, it could be detrimental to "split and match" every folder at each iteration.
We can store the folder, found given a pattern, in a dictionary. The dictionary will be updated if a new pattern is given, else it will return the previously found folder.
import os
import glob
import shutil
sourcefile= r'C:\Users\cah\Desktop\000Turning'
destinationPath = r'C:\Users\cah\Desktop\08-CAM'
# Global dictionary to store folder paths, relative to a pattern
found_pattern = dict()
all_folders_splitted = [os.path.split(f)\
for f in glob.iglob(os.path.join(destinationPath , "**"), recursive=True)\
if os.path.isdir(f)]
def find_folder(part1, part2):
current_key = tuple([part1, part2])
if current_key in pattern_match:
# Already found previously.
# We just return the folder path, stored as the value.
return pattern_match[current_key]
matching_folders1 = [folder for folder in all_folders_splitted\
if os.path.split(folder[0])[-1].startswith(part1)]
matching_folder2 = None
for matching_folder2 in matching_folders1:
if matching_folder2[-1].startswith(part2):
dest = os.path.join(*matching_folder2)
# Update the dictionary
pattern_match[current_key] = dest
return dest
if matching_folder2 is None:
dest = os.path.join(destinationPath, part1, part2)
else:
dest = os.path.join(matching_folder2[0], part2)
# Update the dictionary
pattern_match[current_key] = dest
os.makedirs(dest, exist_ok = True)
return dest
# All the files you want to move
files_gen = glob.iglob(os.path.join(source_path, "**", "*-*-*.nc"), recursive=True)
for file in files_gen:
# Split the first two "-"
basename = os.path.basename(file)
splitted = basename.split("-", 2)
# Format the destination folder.
# Creates it if necessary
destination_folder = find_folder(splitted[0], splitted[1])
# Copying the file
shutil.copy2(file, os.path.join(destination_folder, basename))
This updated solution makes it more efficient (especially when many files should share the same folder) and you could also make use of the dictionary later, if you save it.

Importing xml file into Access database with defined id

I am strugling in order to import an enormous amount of data from xml file into Access.
The problem I am facing is that files I want to import does contain the first row with id
<vin id="11111111111111111">
<description>Mazda3 L 2.0l MZR 150 PS 4T 5AG AL-EDITION TRA-P</description>
<type>BL</type>
<typeapproval>e11*2001/116*0262*07</typeapproval>
<variant>B2F</variant>
<version>7EU</version>
<series>Mazda3</series>
<body>L</body>
<engine>2.0l MZR 150 PS</engine>
<grade>AL-EDITION</grade>
<transmission>5AG</transmission>
<colourtype>Mica</colourtype>
<extcolourcode>34K</extcolourcode>
<extcolourcodedescription>Crystal White Pearl</extcolourcodedescription>
<intcolourcode>BU4</intcolourcode>
<intcolourcodedescription>Black</intcolourcodedescription>
<registrationdate>2012-07-20</registrationdate>
<productiondate>2011-11-30</productiondate>
</vin>
so the result of my import is all the lines except from the VIN number of vehicle that is actually defined as id.
I was trying to manually replace characters like:
"> etc. with
etc.
to get rid of that id but I have actually dozens of files and hundreds of thousands records in each file so it is quite a pain...
so I thought about concatinating all files together with a script in python:
import os
import csv
import pandas as pd
import numpy as np
ver='2011'
dirName =r'C:\Users\dawid\Desktop\DE_DATA\Mazda_DE\VINs_DE\Mazda\xml'.format(ver);
out_file=r'C:\Users\dawid\Desktop\DE_DATA\Mazda_DE\VINs_DE\Mazda\Output.xml'.format(ver);
def getListOfFiles(dirName):
# create a list of file and sub directories
# names in the given directory
listOfFile = os.listdir(dirName)
allFiles = list()
# Iterate over all the entries
for entry in listOfFile:
# Create full path
fullPath = os.path.join(dirName, entry)
# If entry is a directory then get the list of files in this directory
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
else:
allFiles.append(fullPath)
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
return allFiles
listOfFileOut=getListOfFiles(dirName)
#filenames = allFiles
with open(out_file, 'w',encoding='ANSI') as outfile:
for fname in listOfFileOut:
with open(fname,encoding='ANSI') as infile:
for line in infile:
outfile.write(line)
print("Done")
But this completely destroyed structure of the xml file and I cannot import it anymore.
Could anyone suggest if it's possilble to use python to get rid of all those ids to be able to import the whole Database in access?
Thank you in advance.enter image description here
Try this.
from simplified_scrapy import utils, SimplifiedDoc, req
dirName = r'C:\Users\dawid\Desktop\DE_DATA\Mazda_DE\VINs_DE\Mazda\xml'
listFile = utils.getSubFile(dirName, end='.xml')
for f in listFile:
doc = SimplifiedDoc(utils.getFileContent(f, encoding='ANSI'))
doc.replaceReg('<vin[^>]*>', '<vin>')
print(doc.html)
# utils.saveFile(f, doc.html, encoding='ANSI') # write to original file
Result:
<vin>
<description>Mazda3 L 2.0l MZR 150 PS 4T 5AG AL-EDITION TRA-P</description>
<type>BL</type>
<typeapproval>e11*2001/116*0262*07</typeapproval>
<variant>B2F</variant>
<version>7EU</version>
...

Exporting multiple files with different filenames

Lets say I have n files in a directory with filenames: file_1.txt, file_2.txt, file_3.txt .....file_n.txt. I would like to import them into Python individually and then do some computation on them, and then store the results into n corresponding output files: file_1_o.txt, file_2_o.txt, ....file_n_o.txt.
I've figured out how to import multiple files:
import glob
import numpy as np
path = r'home\...\CurrentDirectory'
allFiles = glob.glob(path + '/*.txt')
for file in allFiles:
# do something to file
...
...
np.savetxt(file, ) ???
Not quite sure how to append the _o.txt (or any string for that matter) after the filename so that the output file is file_1_o.txt
Can you use the following snippet to build the output filename?
parts = in_filename.split(".")
out_filename = parts[0] + "_o." + parts[1]
where I assumed in_filename is of the form "file_1.txt".
Of course would probably be better to put "_o." (the suffix before the extension) in a variable so that you can change at will just in one place and have the possibility to change that suffix more easily.
In your case it means
import glob
import numpy as np
path = r'home\...\CurrentDirectory'
allFiles = glob.glob(path + '/*.txt')
for file in allFiles:
# do something to file
...
parts = file.split(".")
out_filename = parts[0] + "_o." + parts[1]
np.savetxt(out_filename, ) ???
but you need to be careful, since maybe before you pass out_filename to np.savetxt you need to build the full path so you might need to have something like
np.savetxt(os.path.join(path, out_filename), )
or something along those lines.
If you would like to combine the change in basically one line and define your "suffix in a variable" as I mentioned before you could have something like
hh = "_o." # variable suffix
..........
# inside your loop now
for file in allFiles:
out_filename = hh.join(file.split("."))
which uses another way of doing the same thing by using join on the splitted list, as mentioned by #NathanAck in his answer.
import os
#put the path to the files here
filePath = "C:/stack/codes/"
theFiles = os.listdir(filePath)
for file in theFiles:
#add path name before the file
file = filePath + str(file)
fileToRead = open(file, 'r')
fileData = fileToRead.read()
#DO WORK ON SPECIFIC FILE HERE
#access the file through the fileData variable
fileData = fileData + "\nAdd text or do some other operations"
#change the file name to add _o
fileVar = file.split(".")
newFileName = "_o.".join(fileVar)
#write the file with _o added from the modified data in fileVar
fileToWrite = open(newFileName, 'w')
fileToWrite.write(fileData)
#close open files
fileToWrite.close()
fileToRead.close()

How to add prefix to files' names, replace a character, and move to new directory?

For example, the files will look like FG-4.jpg FG-5.jpg, etc. and need to be copied to a new directory and named test_FG_4.jpg test_FG_5.jpg, etc.
Here is the updated code:
import shutil
import glob
import os
InFolder = r"C:\test_in"
OutFolder = r"C:\test_out"
for f in glob.glob('*'):
shutil.move(InFolder/*, OutFolder, copy_function=copy2)
os.listdir(OutFolder)
new_filename = f.replace("-","_")
new_filename = "test_" + new_filename
os.rename(f,new_filename)
I'm getting the error
File "c:\copyRename2.py", line 8, in ?
shutil.move(InFolder/*, OutFolder, copy_function=copy2)
invalid syntax: copyRename2.py, line 8, pos 26 in file c:\copyRename2.py, line 8
shutil.move(InFolder/*, OutFolder, copy_function=copy2)
First attempt:
import shutil
import glob
import os
InFolder = r"C:\test_in"
OutFolder = r"C:\test_out"
for f in glob.glob('*'):
shutil.copyfile(f, OutFolder)
new_filename = f.replace("-","_")
new_filename = "test_" + new_filename
os.rename(f,new_filename)
I am not sure about what you want. So this program checks for any file with extension .jpg and then copies them into a new folder("NewDir") by adding "Test_" to the file name. If the folder doesn't exist, the program creates the folder. Maybe you can make the changes you need based on this program.
import shutil
import os
newdir="NewDir"
for m in (os.listdir()):
if m[-4:]==(".txt"):
if os.path.isdir(newdir):
shutil.copy(m,newdir+"/"+"Test_"+m)
else:
os.mkdir(newdir)
shutil.copy(m,newdir+"/"+"Test_"+m)

Add multiple shapefiles to MXD with arcpy

I am trying to add multiple files to an MXD file using some of the code found here:
How do I add a shapefile in ArcGIS via python scripting?
The code below does not return any errors, however none of the shapefiles seem to get added to the blank mxd document.
Any help as to why this is not working would be appreciated.
import arcpy
import arcpy.mapping
from shutil import copyfile
from os import listdir
from os.path import isfile, join
def AddAllShapeFilesToNewMXD(source_directory):
# Source file is the template that the will be copied to the directory with
# All the shape files in it.
source_file = 'M:\Ops Field Map\Blank Map.mxd'
# Output file is the name of the file that will have the shape files added to it
output_file = 'GPS_Map'
rev_count = 0
while isfile(join(source_directory, output_file + '.mxd')):
#Make sure a unique file is created
print ('File ' + output_file + '.mxd exists.'),
rev_count += 1
output_file = output_file + '_rev' + str(rev_count)
print ('Trying ' + output_file + '.mxd ...')
# Create the destination file. This is the file the shape files are added to
destination_file = join(source_directory, output_file + '.mxd')
copyfile(source_file, destination_file)
print 'MXD file created: ' + destination_file
# Get the map doccument
mxd = arcpy.mapping.MapDocument(destination_file)
# Get the data frame
data_frame = arcpy.mapping.ListDataFrames(mxd, "*")[0]
# Get a list of all the shape files
shp_files = [ f for f in listdir(source_directory) if isfile(join(source_directory, f)) and f.endswith('.shp') ]
# Add all the shapefiles to the mxd file
for s in shp_files:
new_layer_full_path = join(source_directory, s)
new_layer = arcpy.mapping.Layer(new_layer_full_path)
arcpy.mapping.AddLayer(data_frame, new_layer, "BOTTOM")
print 'Layer added ' + new_layer_full_path
del new_layer
return True
directory = 'C:\Users\gps\Desktop\dd test'
AddAllShapeFilesToNewMXD(directory)
It is hard to know without files to play with, but one reason the code above may not be giving an error but not displaying anything is that for many arcgis map display operations, you have to make sure that the arcgis geoprocessing option of 'add results of geoprocessing operations to the display' under geoprocessing> geoprocessing options is turned on.
It could be that you are missing those two important lines:
arcpy.RefreshActiveView()
arcpy.RefreshTOC()
Looks like you're almost there and that both Lucas and BelowZero are offering good suggestions if your code is running w/in a active session. If it's creating an *.mxd for later use, I don't see where the results are saved. Here's some simpler sample code, note the last line:
mxd = arcpy.mapping.MapDocument(srcdir+'/data_bin/Untitled.mxd')
data_frame = arcpy.mapping.ListDataFrames(mxd)[0]
mxd.activeView = data_frame.name
flowlinesLyr=arcpy.mapping.Layer('..\\NHDPlus\\nhdflowline_en')
flowlinesLyr.name='NHDPlus Flowlines'
arcpy.mapping.AddLayer (data_frame, flowlinesLyr,'TOP')
gagesEventLyr=arcpy.mapping.Layer('..\\NHDPlus\\StreamGageEvent')
gagesEventLyr.name='Original stream gage locations'
arcpy.mapping.AddLayer (data_frame, gagesEventLyr,'TOP')
mxd.saveACopy(datadir+'\NHDPlus'+Region+'_Gage_QAQC.mxd')

Categories