I am using python to create a CSV(Comma-separated values) file. It contains the path and all file in folder. With file in different folder, it is separated by comma and then the number of its folder without character. For example, I have image 1.pgm that stores in folder s14. Then the CSV format looks like
at/s14/1.pgm;14
In which, at/s14/1.pgm is full path to file 1.pgm and 14 is number (called label) that get from its folder s14. Could you help me implement it by python. I tried with that code but it does not work for my task
Try this:
import os
import re
import sys
SEPARATOR = ';'
CSV_FILE_PATH = 'output.csv'
if __name__ == "__main__":
if len(sys.argv) != 2:
print "usage: create_csv <base_path>"
sys.exit(1)
path = sys.argv[1]
# only folders in specified folder
folders = filter(os.path.isdir, os.listdir(path))
with open(CSV_FILE_PATH, 'w') as csv_file:
for folder in folders:
# folder path
folder_path = os.path.join(path, folder)
# extracting number
match = re.match(r's(\d+)', folder) # s\d+ is the pattern of your folder name, for example s20
if match is not None:
number = match.group(1)
# loop for files within the folder
for filename in os.listdir(folder_path):
# full file path
filepath = os.path.join(folder_path, filename)
# writing result
line = SEPARATOR.join((filepath, number))
csv_file.write(line + '\n')
Related
Usually I navigate to the folder I am extracting data from and copy the file name directly:
df2=pd.read_csv('10_90_bnOH-MEA.csv',usecols=[1])
If I have multiple files and want to do the same for all the files, how do I specify the folder to open and get all the files inside?
I want to run the above code without specifying the file's full path
(C:\Users\X\Desktop\Y\Z\10_90_bnOH-MEA.csv)
You want listdir from the os module.
import os
path = "C:\\Users\\X\\Desktop\\Y\\Z\\"
files = os.listdir(path)
print(files)
dataframe_list = []
for filename in files:
dataframe_list.append(pd.read_csv(os.path.join(path,filename)))
You should open the desired directory and loop through all the files then do something to them.
# import required module
import os
# assign directory
directory = 'files'
# iterate over files in
def goThroughDirectory(directory):
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(f):
# do something
If you also want to loop through all the files in a directory you should add a check for if os.path.isdir(f) like this
...
def goThroughDirectory(directory):
for filename in os.listdir(directory):
f = os.path.join(directory, filename)
# checking if it is a file
if os.path.isfile(f):
# do something
elif os.path.isdir(f):
# its not a file but a directory then loop through that directory aswell
goThroughDirectory(directory + "\" + f)
for more information you should check geeksforgeeks
I want to move files from one directory to another from a .txt file containing the names of the files to be moved, the script must first browse the directory and if it finds the file it moves it to the new directory. Where to start? I've managed to do this for a file list but I'd like to do it directly via the .txt file without rewriting the names of the files to be moved
import shutil, os
files = ['file1.txt', 'file2.txt', 'file3.txt', 'file4.txt']
for file in files:
shutil.move(file, 'destination_directory')
As I know, U cant move your files with .txt
Just move your file_path
You can use my code below.
I have double checked and it work on my side.
Sorry for my poor English Skill :)
import os
import shutil
from pathlib import Path
def create_directory(dir_name: str):
"""To create directory before create files: txt, csv..."""
system_path = os.getcwd()
dir_path = os.path.join(system_path, dir_name)
try:
os.makedirs(dir_path, exist_ok=True)
except OSError as error:
print("Directory '%s' can not be created" % dir_name)
return dir_path
def create_files(dir_path: str, file_name: str):
"""Function for creating files"""
file_path = dir_path + fr"\{file_name}"
with open(file_path, "w") as open_file:
if Path(file_path).is_file():
print(f'File: {file_name} created successfully')
else:
print(f'File: {file_name} does not exist')
open_file.close() # Need to close.
return file_path
def main():
# Step 1: Creating file1.txt, file2.txt, file3.txt, file4.txt
file_one = create_files(create_directory("file1_dir"), 'file1.txt')
file_two = create_files(create_directory("file2_dir"), 'file2.txt')
file_three = create_files(create_directory("file3_dir"), 'file3.txt')
file_four = create_files(create_directory("file4_dir"), 'file4.txt')
# Step 2: Creating destination_directory:
destination_dir = create_directory('destination_directory')
files = [file_one, file_two, file_three, file_four]
# Step 3: Moving Your Files:
for file in files:
shutil.move(file, destination_dir)
if __name__ == "__main__":
main()
I have a file structure something like this:
/a.zip
/not_a_zip/
contents
/b.zip
contents
and I want to create a directory a and extract a.zip into it and all the nested zipped files where they are so I get something like this:
/a/
/not_a_zip/
contents
/b/
contents
I tried this solution, but I was getting errors because inside my main directory I have subdirectories, as well as zip files.
I want to be able to extract the main zip file into a directory of the same name, then be able to extract all nested files within, no matter how deeply nested they are.
EDIT: my current code is this
archive = zipfile.ZipFile(zipped, 'r')
for file in archive.namelist():
archive.extract(file, resultDirectory)
for f in [filename for filename in archive.NameToInfo if filename.endswith(".zip")]:
# get file name and path to extract
fileToExtract = resultDirectory + '/' + f
# get directory to extract new file to
directoryToExtractTo = fileToExtract.rsplit('/', 1)
directoryToExtractTo = directoryToExtractTo[0] + '/'
# extract nested file
nestedArchive = zipfile.ZipFile(fileToExtract, 'r')
for file in nestedArchive.namelist():
nestedArchive.extract(fileToExtract, directoryToExtractTo)
but I'm getting this error:
KeyError: "There is no item named 'nestedFileToExtract.zip' in the archive"
Even though it exists in the file system
Based on this other solutions: this and this.
import os
import io
import sys
import zipfile
def extract_with_structure(input_file, output):
with zipfile.ZipFile(input_file) as zip_file:
print(f"namelist: {zip_file.namelist()}")
for obj in zip_file.namelist():
filename = os.path.basename(obj)
if not filename:
# Skip folders
continue
if 'zip' == filename.split('.')[-1]:
# extract a zip
content = io.BytesIO(zip_file.read(filename))
f = zipfile.ZipFile(content)
dirname = os.path.splitext(os.path.join(output, filename))[0]
for i in f.namelist():
f.extract(i, dirname)
else:
# extract a file
zip_file.extract(obj, os.path.join(output))
if __name__ == "__main__":
if len(sys.argv) < 3:
print("No zipfile specified or output folder.")
exit(1)
extract_with_structure(sys.argv[1], sys.argv[2])
I need to read the contents of a file from the list of files from a directory with os.listdir. My working scriptlet is as follows:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
with open(filename, 'rU') as f:
t = f.read()
t = t.split()
print(t)
print(t) gives me all the contents from all the files at once present in the directory (path).
But I like to print the contents on first file, then contents of the second and so on, until all the files are read from in dir.
Please guide ! Thanks.
You can print the file name.
Print the content after the file name.
import os
path = "/home/vpraveen/uni_tmp/temp"
for filename in os.listdir(path):
with open(filename, 'rU') as f:
t = f.read()
print filename + " Content : "
print(t)
First, you should find the path of each file using os.path.join(path, filename). Otherwise you'll loop wrong files if you change the variable path. Second, your script already provides the contents of all files starting with the first one. I added a few lines to the script to print the file path and an empty line to see where the contents end and begin:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
filepath = os.path.join(path, filename)
with open(filepath, 'rU') as f:
content = f.read()
print(filepath)
print(content)
print()
os.listdir returns the name of the files only. you need to os.path.join that name with the path the files live in - otherwise python will look for them in your current working directory (os.getcwd()) and if that happens not to be the same as path python will not find the files:
import os
path = "/Users/Desktop/test/"
for filename in os.listdir(path):
print(filename)
file_path = os.path.join(path, filename)
print(file_path)
..
if you have pathlib at your disposal you can also:
from pathlib import Path
path = "/Users/Desktop/test/"
p = Path(path)
for file in p.iterdir():
if not file.is_file():
continue
print(file)
print(file.read_text())
If I have a main folder and many sub-folders, and my targeted files are located at these sub-folders. How could I set up my path properly then the program could grab these targeted files via my main folder directly?
For example,
Main_folder
>sub_1
>>sub_1_v1
>>>targeted_file.txt # file I need
>>sub_2_v2
>>>targeted_file.txt # file I need
>sub_2
>>sub_1_v1
>>>targeted_file.txt # file I need
>>sub_2_v2
>>>targeted_file.txt # file I need
Here is a program created by Julien Spronck (Grabbing data from certain files)
def get_all_files(path):
## get a generator with all file names
import os
import glob
return glob.iglob(os.path.join(path,'*.txt'))
def get_all_data(files):
## get a generator with all the data from all the files
for fil in files:
with open(fil, 'r') as the_file:
for line in the_file:
yield line
def write_lines_to_file(lines, outfile):
with open(outfile, 'w') as the_file:
for line in lines:
the_file.write(line+'\n')
path = 'blah blah' # path should be given here!
outfile = 'blah.csv'
files = get_all_files(path)
lines = get_all_data(files)
write_lines_to_file(lines, outfile)
My question is, how could I give the path properly (from main folder) then I can grab all targeted files at one time?
Thanks.
To iterate over folders, then files, use:
import os
def list_files(dir):
r = []
subdirs = [x[0] for x in os.walk(dir)]
for subdir in subdirs:
files = os.walk(subdir).next()[2]
if (len(files) > 0):
for file in files:
r.append(subdir + "/" + file)
return r
As seen here:
Python: Iterate through folders, then subfolders and print filenames with path to text file