Open csv file with Pandas and delete if has only 1 row - python

I have a task to create a script to ssh to list of 10 cisco routers weekly and check for config changes and send notification. So i have in place the script that logs and run the command and send it to csv. I have modified so if there is not changes all I have in the csv will be for example:
rtr0003# -which is the router name only. If there will be conf change the excel will have inside for example:
My question is how to run pandas to open each file and if it sees only one line/row to delete the excel file and if more lines to skip it.
This is how i write the files:
files = glob.glob('*.csv')
for file in files:
df=pd.read_csv(file)
df=df.dropna()
df.to_csv(file,index=False)
df1=pd.read_csv(file,skiprows = 2)
#df1=df1.drop(df1.tail(1))
df1.to_csv(file,index=False)

import os
import glob
import csv
files = glob.glob('*.csv')
for file in files:
with open(file,"r") as f:
reader = csv.reader(f,delimiter = ",")
data = list(reader)
row_count = len(data)
if row_count == 1:
os.remove(file)

Here is a solution using pandas:
import pandas as pd
import glob
import os
csv_files = glob.glob('*.csv')
for file in csv_files:
df_file = pd.read_csv(file, low_memory = False)
if len(df_file) == 1:
os.remove(file)
If you are using excel files, change
glob.glob('*.csv')
to
glob.glob('*.xlsx')
and
pd.read_csv(file, low_memory = False)
to
pd.read_excel(file)

Related

Pandas,(Python) -> Export to xlsx with multiple sheets

i`m traind to read some .xlsx files from a directory that is create earlier using curent timestamp and the files are store there, now i want to read those .xlsx files and put them in only one .xlsx files with multiple sheets, but i tried multiple ways and didnt work, i tried:
final file Usage-SvnAnalysis.xlsx
the script i tried:
import pandas as pd
import numpy as np
from timestampdirectory import createdir
import os
dest = createdir()
dfSvnUsers = pd.read_csv(dest, "SvnUsers.xlsx")
dfSvnGroupMembership = pd.read_csv(dest, "SvnGroupMembership.xlsx")
xlwriter = pd.ExcelWriter("Usage-SvnAnalysis.xlsx")
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
xlwriter.close()
the folder that is created automaticaly with curent timestamp that contains files.
this is one of file that file that i want to add as sheet in that final xlsx
this is how i create the director with curent time and return dest to export the files in
I change a bit the script, now its how it looks like, still getting error :
File "D:\Py_location_projects\testfi\Usage-SvnAnalysis.py", line 8, in
with open(file, 'r') as f: FileNotFoundError: [Errno 2] No such file or directory: 'SvnGroupMembership.xlsx'
the files exist, but the script cant take the root path to that directory because i create that directory on other script using timestamp and i returned the path using dest
dest=createdir() represent the path where the files is, what i need to do its just acces this dest an read the files from there and export them in only 1 xlsx as sheets of him , in this cas sheet1 and sheet2, because i tried to reat only 2 files from that dir
import pandas as pd
import numpy as np
from timestampdirectory import createdir
import os
dest = createdir()
files = os.listdir(dest)
for file in files:
with open(file, 'r') as f:
dfSvnUsers = open(os.path.join(dest, 'SvnUsers.xlsx'))
dfSvnGroupMembership = open(os.path.join(dest, 'SvnGroupMembership.xlsx'))
xlwriter = pd.ExcelWriter("Usage-SvnAnalysis.xlsx")
dfSvnUsers.to_excel(xlwriter, sheet_name='SvnUsers', index = False )
dfSvnGroupMembership.to_excel(xlwriter, sheet_name='SvnGroupMembership', index = False )
xlwriter.close()
I think you should try read Excel files use pd.read_excel instead of pd.read_csv.
import os
dfSvnUsers = pd.read_excel(os.path.join(dest, "SvnUsers.xlsx"))
dfSvnGroupMembership = pd.read_excel(os.path.join(dest, "SvnGroupMembership.xlsx"))

Converting file from python to exe

I've written a file that converts the delimiter in a CSV from format ',' to the ';'.
I'm trying to convert this file from a .py to a .exe using auto-py-to-exe. Whilst this process does work and execute correctly clicking on the app does not do anything.
Am I missing something from the code? Perhaps an auto execute command?
# import necessary libraries
import pandas as pd
import os
import glob
# use glob to get all the csv files
# in the folder
path = os.getcwd()
csv_files = glob.glob(os.path.join(path, "*.csv"))
# loop over the list of csv files
for f in csv_files:
# read the csv file
df = pd.read_csv(f, delimiter=',')
df.to_csv(f, sep=';', index = False)
You should use argparse to provide the inpu# import necessary libraries
import pandas as pd
import os
import glob
import argparse
# use glob to get all the csv files
# in the folder
parser = argparse.ArgumentParser()
# Adding optional argument
parser.add_argument("-i", "--InputPath", help="Input path files")
parser.add_argument("-i", "--OutPath", help="Out path files")
path = os.path.abspath(args.InputPath)
csv_files = glob.glob(os.path.join(path, "*.csv"))
# loop over the list of csv files
for f in csv_files:
# read the csv file
df = pd.read_csv(f, delimiter=',')
df.to_csv(os.path.join(os.path.abspath(args.OutPath),f), sep=';', index = False)
Then simply use the auto-py-to-exe to generate the exe.
Open the cmd to the place where the exe is there and use it accordingly.
CMD:
$ file.py -i "input path" -o "Out path"

how to open multiple csv files in folders that are in a zipped file

zipped file --> 10folders --> 20 csv files for each folder
the zipped file title is yyyy-mm
folders titles are yyyy-mm-dd
csv files titles are different timings of the day
tried the following code but does not work
import pandas as pd
import os
import glob
myzip=zipfile.ZipFile("C:/xxx/xxx/xxx/xxx/2021-01.zip")
for fname in myzip.namelist():
if 'csv' not in fname:
pathname = "C:/xxx/xxx/xxx/xxx/2021-01.zip/" + fname
path = os.getcwd()
csv_files = glob.glob(os.path.join(pathname, "*.csv"))
for f in csv_files:
# read the csv file
df = pd.read_csv(f)
# print the location and filename
print('Location:', f)
print('File Name:', f.split("\\")[-1])
# print the content
print('Content:')
display(df)
print()
If it is not necessary to work with zipped files, you can unzip them first:
import zipfile
with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
zip_ref.extractall(directory_to_extract_to)
And then work with the extracted folders normally.

Reading bulk Excel files from a file with Python (Pandas)

I have 40 .xls files in a folder I would like to import into a df in Pandas.
Is there a function similar to read_csv() that will allow me to direct Python to the folder and open each of these files into the dataframe? All headers are the same in each file
Try pandas.read_excel to open each file. You can loop over the files using the glob module.
import glob
import pandas as pd
dfs = {}
for f in glob.glob('*.xlsx'):
dfs[f] = pd.read_excel(f)
df = pd.concat(dfs) # change concatenation axis if needed
you can load excel files and concat each other.
import os
import pandas as pd
files = os.listdir(<path to folder>)
df_all = pd.DataFrame()
for file in files:
df = pd.read_excel(f"<path to folder>/{file}")
df_all = pd.concat([df_all,df])
import os import pandas as pd
folder = r'C:\Users\AA\Desktop\Excel_file' files = os.listdir(folder)
for file in files: if file.endswith('.xlsx'): df = pd.read_excel(os.path.join(folder,file))
Does this help?

Copy data from CSV and PDF into HDF5 using Python

How to transfer files from specific folders to hdf5 file type using python? files type is PDF and CSV.
For example i have this path /root/Desktop/mal/ex1/ that contain many CSV files and PDF files
all of them i wont to make 1 single hdf5 file contain all this CSV and PDF files.
You could modify the below code based on your requirement details:
import numpy as np
import h5py
import pandas as pd
import glob
yourpath = '/root/Desktop/mal/ex1'
all_files = glob.glob(yourpath + "/*.csv")
li = []
for filename in all_files:
df = pd.read_csv(filename,index_col=None, header=0)
li.append(df)
frame = pd.concat(li, axis=0, ignore_index=True)
hf = h5py.File('data.h5', 'w')
hf.create_dataset('dataset_1', data=frame)
hf.close()

Categories