how to print csv with the same pathname but with an extension? - python

In the code that I present, it reads csv files that are in one folder and prints them in another, I want this print to be with the same name that it has in the path but with an extension. For example, if the file is called: aaa.csv, the print would be aaa_ext.csv
the print i get are file_list0.csv, file_list1.csv, file_list2.csv
This is my code:
import pandas as pd
import numpy as np
import glob
import os
all_files = glob.glob("C:/Users/Gamer/Documents/Colbun/Saturn/*.csv")
file_list = []
for i,f in enumerate(all_files):
df = pd.read_csv(f,header=0,usecols=["t","f"])
df.to_csv(f'C:/Users/Gamer/Documents/Colbun/Saturn2/file_list{i}.csv')

you can modify the line that writes the csv file as follows:
df.to_csv(f'C:/Users/Gamer/Documents/Colbun/Saturn2/{os.path.basename(f).split(".")[0]}_ext.csv')

Related

Saving files to different folder

I´m trying to save converted excel files from different paths, to the same folder.
How can I pass the path to the function correctly?
Now what is happening is that it is attaching the original path to the save path I have given to the function.
So my solution was:
import pandas as pd
import glob
import csv, json
import openpyxl
from pathlib import Path
import os, os.path
import errno
destination_path = "C:\\csv_files"
all_paths = [r"C:\\PLM\\PML.xlsx",r"C:\\TMR\\TMR.xlsx",r"C:\\PLM\\PLM.xlsx"]
Create variable to store tuple list
all_items = []
Create tuple list with file path and file name without extension
def getFileName():
for paths in all_paths:
all_items.append((paths , paths.split("\\")[-1].split(".")[0]))
Convert given files by iterating through tuple list and pass destination folder.
def convertFiles():
for item in all_items:
read_file = pd.read_excel(item[0], 'Relatório - DADOS', index_col=None, engine='openpyxl')
read_file.to_csv(destination_path + "\\"+ item[1] + ".csv", encoding='utf-8', index=False)
You can ensure the save folder exists by adding this line before the outer for loop:
Path(save_path).mkdir(exist_ok=True)
See documentation.

Pandas,(Python) -> Export to xlsx with multiple sheets

i`m traind to read some .xlsx files from a directory that is create earlier using curent timestamp and the files are store there, now i want to read those .xlsx files and put them in only one .xlsx files with multiple sheets, but i tried multiple ways and didnt work, i tried:
final file Usage-SvnAnalysis.xlsx
the script i tried:
import pandas as pd
import numpy as np
from timestampdirectory import createdir
import os
dest = createdir()
dfSvnUsers = pd.read_csv(dest, "SvnUsers.xlsx")
dfSvnGroupMembership = pd.read_csv(dest, "SvnGroupMembership.xlsx")
xlwriter = pd.ExcelWriter("Usage-SvnAnalysis.xlsx")
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
xlwriter.close()
the folder that is created automaticaly with curent timestamp that contains files.
this is one of file that file that i want to add as sheet in that final xlsx
this is how i create the director with curent time and return dest to export the files in
I change a bit the script, now its how it looks like, still getting error :
File "D:\Py_location_projects\testfi\Usage-SvnAnalysis.py", line 8, in
with open(file, 'r') as f: FileNotFoundError: [Errno 2] No such file or directory: 'SvnGroupMembership.xlsx'
the files exist, but the script cant take the root path to that directory because i create that directory on other script using timestamp and i returned the path using dest
dest=createdir() represent the path where the files is, what i need to do its just acces this dest an read the files from there and export them in only 1 xlsx as sheets of him , in this cas sheet1 and sheet2, because i tried to reat only 2 files from that dir
import pandas as pd
import numpy as np
from timestampdirectory import createdir
import os
dest = createdir()
files = os.listdir(dest)
for file in files:
with open(file, 'r') as f:
dfSvnUsers = open(os.path.join(dest, 'SvnUsers.xlsx'))
dfSvnGroupMembership = open(os.path.join(dest, 'SvnGroupMembership.xlsx'))
xlwriter = pd.ExcelWriter("Usage-SvnAnalysis.xlsx")
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
xlwriter.close()
I think you should try read Excel files use pd.read_excel instead of pd.read_csv.
import os
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))

Converting file from python to exe

I've written a file that converts the delimiter in a CSV from format ',' to the ';'.
I'm trying to convert this file from a .py to a .exe using auto-py-to-exe. Whilst this process does work and execute correctly clicking on the app does not do anything.
Am I missing something from the code? Perhaps an auto execute command?
# import necessary libraries
import pandas as pd
import os
import glob
# use glob to get all the csv files
# in the folder
path = os.getcwd()
csv_files = glob.glob(os.path.join(path, "*.csv"))
# loop over the list of csv files
for f in csv_files:
# read the csv file
df = pd.read_csv(f, delimiter=',')
df.to_csv(f, sep=';', index = False)
You should use argparse to provide the inpu# import necessary libraries
import pandas as pd
import os
import glob
import argparse
# use glob to get all the csv files
# in the folder
parser = argparse.ArgumentParser()
# Adding optional argument
parser.add_argument("-i", "--InputPath", help="Input path files")
parser.add_argument("-i", "--OutPath", help="Out path files")
path = os.path.abspath(args.InputPath)
csv_files = glob.glob(os.path.join(path, "*.csv"))
# loop over the list of csv files
for f in csv_files:
# read the csv file
df = pd.read_csv(f, delimiter=',')
df.to_csv(os.path.join(os.path.abspath(args.OutPath),f), sep=';', index = False)
Then simply use the auto-py-to-exe to generate the exe.
Open the cmd to the place where the exe is there and use it accordingly.
CMD:
$ file.py -i "input path" -o "Out path"

Reading bulk Excel files from a file with Python (Pandas)

I have 40 .xls files in a folder I would like to import into a df in Pandas.
Is there a function similar to read_csv() that will allow me to direct Python to the folder and open each of these files into the dataframe? All headers are the same in each file
Try pandas.read_excel to open each file. You can loop over the files using the glob module.
import glob
import pandas as pd
dfs = {}
for f in glob.glob('*.xlsx'):
dfs[f] = pd.read_excel(f)
df = pd.concat(dfs) # change concatenation axis if needed
you can load excel files and concat each other.
import os
import pandas as pd
files = os.listdir(<path to folder>)
df_all = pd.DataFrame()
for file in files:
df = pd.read_excel(f"<path to folder>/{file}")
df_all = pd.concat([df_all,df])
import os import pandas as pd
folder = r'C:\Users\AA\Desktop\Excel_file' files = os.listdir(folder)
for file in files: if file.endswith('.xlsx'): df = pd.read_excel(os.path.join(folder,file))
Does this help?

How to find a required file and read it in a zip file?

I have zip files and each zip file contains three subfolders (i.e. ini, log, and output). I want to read a file from output folder and it contains three csv files with different names. Suppose three files name are: initial.csv, intermediate.csv, and final.csv. and just want to read final.csv file.
The code that I tried to read file is:
import zipfile
import numpy
import pandas as pd
zipfiles = glob.glob('/home/data/*.zip')
for i in np.arange(len(zipfiles)):
zip = zipfile.ZipFile(zpfiles[i])
f = zip.open(zip.namelist().startswith('final'))
data = pd.read_csv(f, usecols=[3,7])
and the error I got is 'list' object has no attribute 'startswith'
How can I find the correct file and read it?
Replase
f = zip.open(zip.namelist().startswith('final'))
With
f = zip.open('output/final.csv')
If you can "find" it:
filename = ([name for name in zip.namelist() if name.startswith('output/final')][0])
f = zip.open(filename)
To find sub dirs, let's switch to pathlib which uses glob:
from pathlib import Path
import zipfile
import pandas as pd
dfs = []
files = Path('/home/data/').rglob('*final*.zip') #rglob recursively trawls all child dirs.
for file in files:
zip = zipfile.ZipFile(zpfiles[file])
....
# your stuff
df = pd.read_csv(f, usecols=[3,7])
dfs.append(df)

Categories