Copy certain files from one folder to another using python - python

I am trying to copy only certain files from one folder to another. The filenames are in a attribute table of a shapefile.
I am successful upto writing the filenames into a .csv file and list the column containing the list of the filenames to be transferred. I am stuck after that on how to read those filenames to copy them to another folder. I have read about using Shutil.copy/move but not sure how to use it. Any help is appreciated. Below is my script:
import arcpy
import csv
import os
import sys
import os.path
import shutil
from collections import defaultdict
fc = 'C:\\work_Data\\Export_Output.shp'
CSVFile = 'C:\\wokk_Data\\Export_Output.csv'
src = 'C:\\UC_Training_Areas'
dst = 'C:\\MOSAIC_Files'
fields = [f.name for f in arcpy.ListFields(fc)]
if f.type <> 'Geometry':
for i,f in enumerate(fields):
if f in (['FID', "Area", 'Category', 'SHAPE_Area']):
fields.remove (f)
with open(CSVFile, 'w') as f:
f.write(','.join(fields)+'\n')
with arcpy.da.SearchCursor(fc, fields) as cursor:
for row in cursor:
f.write(','.join([str(r) for r in row])+'\n')
f.close()
columns = defaultdict(list)
with open(CSVFile) as f:
reader = csv.DictReader(f)
for row in reader:
for (k,v) in row.items():
columns[k].append(v)
print(columns['label'])

Given the name of the file
columns['label'] you can use the following to move a file
srcpath = os.path.join(src, columns['label'])
dstpath = os.path.join(dst, columns['label'])
shutil.copyfile(srcpath, dstpath)

Here is the script I used to solve my problem:
import os
import arcpy
import os.path
import shutil
featureclass = "C:\\work_Data\\Export_Output.shp"
src = "C:\\Data\\UC_Training_Areas"
dst = "C:\\Data\\Script"
rows = arcpy.SearchCursor(featureclass)
row = rows.next()
while row:
print row.Label
shutil.move(os.path.join(src,str(row.Label)),dst)
row = rows.next()

Think of it this ways way source and destination
assuming you want to copy file from your picture folder to your image folder located somewhere in your machine destination
X is your machine name
Z is the file name``
import os;
import shutil;
import glob;
source="C:/Users/X/Pictures/test/Z.jpg"
dest="C:/Users/Public/Image"
if os.path.exists(dest):
print("this folder exit in this dir")
else:
dir = os.mkdir(dest)
for file in glob._iglob(os.path.join(source),""):
shutil.copy(file,dest)
print("done")

Related

How to use elements in list by order

My goal is to change multiple csv files in a folder into JSON.
First, I needed to list my csv files
for file in os.listdir("C:/Users/folder_to_csv"):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
#check if csv files are listed correctly
print(os.path.join("C:/Users/folder_to_csv", filename))
With this, I was able to call csv files in that folder.
Result:
C:/Users/folder_to_csv\file_1.csv C:/Users/folder_to_csv\file_2.csv C:/Users/folder_to_csv\file_3.csv
Then, I wanted to use all of the csv files in 'csvlist' to jsonObj, however for some reason, my codes are only using the first file (C:/Users/folder_to_csv\file_1.csv)
This is what I have tried so far:
import json
import csv
import requests
import threading
import os
for file in os.listdir("C:/Users/folder_to_csv"):
filename = os.fsdecode(file)
if filename.endswith(".csv"):
csvlist = os.path.join("C:/Users/folder_to_csv", filename)
data = {}
def main():
#loop csv list so my codes can read all csv files
length = len(csvlist)
for i in range(length):
i += 1
path = csvlist
#switch csv to json
with open(path, mode='r') as f:
reader = csv.DictReader(f)
processdata = [row for row in reader]
dlist = processdata
jsonObj = json.dumps(dlist)
})
print(jsonObj)
main()
In the initial loop, you keep redefining the csvlist variable. I suppose you want it to be a list? Then just create an initial empty list and append to it instead of redefining
csvlist = []
...
csvlist.append(os.path.join("C:/Users/folder_to_csv", filename))

Python doesn't close the last file

I created this code to get all excel files in a folder and make a csv file to every sheet in every file. This script works fine, but sometimes the last Excel file converted still locked by python on file system. Can anyone help me to understand what's happening?
import sys
from os import listdir
from os.path import isfile, join
import pandas as pd
import csv
import re
def removeEspecialCharacters(obj):
if isinstance(obj, str) :
retorno = re.sub('[(\x90|\x8F)]','',obj).replace("\r","").replace("\n","")
else:
retorno = obj
return retorno
myFolder = r'C:\Users\myuser\Downloads\ConvertFilesToCsv'
myFiles = [f for f in listdir(myFolder) if isfile(join(myFolder, f))]
for x in range(len(myFiles)):
if (myFiles[x].lower().endswith('.xls') or myFiles[x].lower().endswith('.xlsx') or myFiles[x].lower().endswith('.xlsb')):
print('Converting file: '+myFiles[x]);
if (myFiles[x].lower().endswith('.xlsb')):
file = pd.ExcelFile(myFolder+'\\'+myFiles[x], engine='pyxlsb')
else:
file = pd.ExcelFile(myFolder+'\\'+myFiles[x])
for mySheetName in file.sheet_names:
df = pd.read_excel(file, sheet_name=mySheetName)
df = df.applymap(removeEspecialCharacters)
csvFileName = myFolder+'\\'+myFiles[x].replace('.xlsx','').replace('.xlsb','').replace('.xls','')+'_'+mySheetName+'.csv'
df.to_csv(csvFileName,encoding='utf-8-sig',index=False,sep=",",quoting=csv.QUOTE_NONNUMERIC,quotechar="\"",escapechar="\"",decimal=".",date_format='%Y-%m-%d')#,quotechar='\'', escapechar='\\')
file.close()
file = ''
Note: this is a comment putting here for code format.
Your code looks fine to me. I would advise you to use context management, similar to the doc, like this:
for filename in myFiles:
extension = filename.split('.')[-1]
# you didn't seem to check xlsb in your code
if extension not in ['xls', 'xlsx', 'xlsb']:
continue
kwargs = {'engine': 'pyxlsb'} if extension=='xlsb' else {}
with pd.ExcelFile(myFolder + '\\' + filename, **kwargs) as file:
# do other stuff with file
...
# you don't need to close file here
# file.close()

Importing xml file into Access database with defined id

I am strugling in order to import an enormous amount of data from xml file into Access.
The problem I am facing is that files I want to import does contain the first row with id
<vin id="11111111111111111">
<description>Mazda3 L 2.0l MZR 150 PS 4T 5AG AL-EDITION TRA-P</description>
<type>BL</type>
<typeapproval>e11*2001/116*0262*07</typeapproval>
<variant>B2F</variant>
<version>7EU</version>
<series>Mazda3</series>
<body>L</body>
<engine>2.0l MZR 150 PS</engine>
<grade>AL-EDITION</grade>
<transmission>5AG</transmission>
<colourtype>Mica</colourtype>
<extcolourcode>34K</extcolourcode>
<extcolourcodedescription>Crystal White Pearl</extcolourcodedescription>
<intcolourcode>BU4</intcolourcode>
<intcolourcodedescription>Black</intcolourcodedescription>
<registrationdate>2012-07-20</registrationdate>
<productiondate>2011-11-30</productiondate>
</vin>
so the result of my import is all the lines except from the VIN number of vehicle that is actually defined as id.
I was trying to manually replace characters like:
"> etc. with
etc.
to get rid of that id but I have actually dozens of files and hundreds of thousands records in each file so it is quite a pain...
so I thought about concatinating all files together with a script in python:
import os
import csv
import pandas as pd
import numpy as np
ver='2011'
dirName =r'C:\Users\dawid\Desktop\DE_DATA\Mazda_DE\VINs_DE\Mazda\xml'.format(ver);
out_file=r'C:\Users\dawid\Desktop\DE_DATA\Mazda_DE\VINs_DE\Mazda\Output.xml'.format(ver);
def getListOfFiles(dirName):
# create a list of file and sub directories
# names in the given directory
listOfFile = os.listdir(dirName)
allFiles = list()
# Iterate over all the entries
for entry in listOfFile:
# Create full path
fullPath = os.path.join(dirName, entry)
# If entry is a directory then get the list of files in this directory
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
else:
allFiles.append(fullPath)
if os.path.isdir(fullPath):
allFiles = allFiles + getListOfFiles(fullPath)
return allFiles
listOfFileOut=getListOfFiles(dirName)
#filenames = allFiles
with open(out_file, 'w',encoding='ANSI') as outfile:
for fname in listOfFileOut:
with open(fname,encoding='ANSI') as infile:
for line in infile:
outfile.write(line)
print("Done")
But this completely destroyed structure of the xml file and I cannot import it anymore.
Could anyone suggest if it's possilble to use python to get rid of all those ids to be able to import the whole Database in access?
Thank you in advance.enter image description here
Try this.
from simplified_scrapy import utils, SimplifiedDoc, req
dirName = r'C:\Users\dawid\Desktop\DE_DATA\Mazda_DE\VINs_DE\Mazda\xml'
listFile = utils.getSubFile(dirName, end='.xml')
for f in listFile:
doc = SimplifiedDoc(utils.getFileContent(f, encoding='ANSI'))
doc.replaceReg('<vin[^>]*>', '<vin>')
print(doc.html)
# utils.saveFile(f, doc.html, encoding='ANSI') # write to original file
Result:
<vin>
<description>Mazda3 L 2.0l MZR 150 PS 4T 5AG AL-EDITION TRA-P</description>
<type>BL</type>
<typeapproval>e11*2001/116*0262*07</typeapproval>
<variant>B2F</variant>
<version>7EU</version>
...

How to read from a csv file in zip folder and save data from csv file in database?

import glob
import os
import csv
import zipfile
from io import StringIO
for name in glob.glob('C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip'):
base = os.path.basename(name)
filename = os.path.splitext(base)[0]
datadirectory = 'C:/Users/RAMESH SANTHA/Downloads/'
dataFile = filename
archive = '.'.join([dataFile, 'zip'])
fullpath = ''.join([datadirectory, archive])
csv_file = '.'.join([dataFile, 'csv']) #all fixed
filehandle = open(fullpath, 'rb')
zfile = zipfile.ZipFile(filehandle)
data = StringIO.StringIO(zfile.read(csv_file))
reader = csv.reader(data)
for row in reader:
print (row)
I tried following code to read data from zip folder which contains csv file and print rows but got error:
data = StringIO.StringIO(zfile.read(csv_file))
AttributeError: type object '_io.StringIO' has no attribute 'StringIO'
There is no StringIO.StringIO() but io.StringIO()
import io
data = io.StringIO(...)
With your import it will be even without io.
from io import StringIO
data = StringIO(...)
BTW: I think you overcomplicated code using glob and join(). And you can use filename directly with ZipFile without open()
import os
import csv
import zipfile
import io
zip_fullname = 'C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip'
zip_file = os.path.basename(zip_fullname)
csv_file = zip_file.replace('.zip', '.csv')
print(zip_file) # download-NIFTY 50-01012020.zip
print(csv_file) # download-NIFTY 50-01012020.csv
zfile = zipfile.ZipFile(zip_fullname)
data = io.StringIO(zfile.read(csv_file).decode('utf-8')) # bytes needs to be converted to string
reader = csv.reader(data)
for row in reader:
print(row)
But with pandas it should be even simpler
import pandas as pd
df = pd.read_csv('C:/Users/RAMESH SANTHA/Downloads/download-NIFTY 50-01012020.zip')
print(df)
Looking at the script you getting error opening the csv file from zip file. Below is python 3 code that I have working for a zip file having few csv's. The directory to extract should exist before you run the script
import zipfile
path_to_zip_file='/tmp/test1.zip' # Assuming this file exist , This path is from mac, but should work for windows as well'
directory_to_extract_to='/tmp/extract/' # Assuming this directory already exist
import csv,os
import codecs
import glob
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
zip_ref.extractall(directory_to_extract_to)
for file in glob.glob(directory_to_extract_to+'*.csv'):
path = os.path.join(directory_to_extract_to,file)
with open(path, 'rb') as f:
reader = csv.reader(codecs.iterdecode(f, 'utf-8'))
# Below code is print them as arrays
# for row in reader:
# print(row)
# Reading rows as ordered dictionary
dictReader = csv.DictReader(codecs.iterdecode(f, 'utf-8'))
for row in dictReader:
print(row)

Move files listed in csv file?

I have been trying to use the following code to move files that are listed in a csv list. But at most it will copy the last file in the list but not the rest.
I keep hitting this wall with every example I have seen listed what am I doing wrong?
My CVS list will have a list like:
12355,12355.jpg
Here's my code
import os
import shutil
import csv
keys={}
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter = ',')
for rowDict in reader:
keys[rowDict[0]] = rowDict[1]
print (rowDict)
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
for file in os.listdir(dir_src):
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
if file in rowDict[1]:
shutil.move(src_file, dst_file)
I think doing something like this will work (untested):
import os
import shutil
import csv
keys={}
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter=',')
for rowDict in reader:
keys[rowDict[0]] = rowDict[1]
print(rowDict) # if desired
valid_files = set(keys.values()) # file names found in csv
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
for file in os.listdir(dir_src):
if file in valid_files:
src_file = os.path.join(dir_src, file)
dst_file = os.path.join(dir_dst, file)
shutil.move(src_file, dst_file)
As an optimization, unless you need the keys dictionary for other processing, you could change the first part so it just creates the valid_files set variable used in the second for loop:
valid_files = set() # empty set
with open('shuttle_image.csv', 'r') as f:
for rowDict in csv.reader(f, delimiter=','):
valid_files |= {rowDict[1]} # add file name to set
print(rowDict) # if desired
The reason why it's only the last file that could be copied (if it was) is because in this line:
if file in rowDict[1]:
you are referencing rowDict outside of the first for-loop. So at that execution moment, it contains the last value of this loop.
If I understand correctly what you are trying to do you could try something like this (untested code):
import os
import shutil
import csv
dir_src = 'C:\\Users\\Willie\\Desktop\\Suppliers Dropship\\hunting\\'
dir_dst = 'C:\\image\\'
with open('shuttle_image.csv', 'r') as f:
reader = csv.reader(f, delimiter = ',')
for rowDict in reader:
id, filename = rowDict
src_file = os.path.join(dir_src, filename)
if os.path.exists(src_file):
shutil.move(src_file, dir_dst)
So instead of:
Constructing a dictionary with all the values in your CSV file
Somehow check for every file in your source directory that it is included in your dictionary (which is what I interpreted you were trying to do)
And move it if it does.
You could:
For every file extracted from your CSV, check that it exists in your source directory.
If it does, you move it to the destination directory.
Is that what you were trying to do ?
[And if the filename stays the same, you only need to specify the destination directory for the second argument of shutil.move()]

Categories