Create Excel Sheets from different files in Linux with Python - python

There are 2 txt file in a linux server.
first data file:
a;1234
b;12334
c;234234
second data file :
a ; ass ; asfda
b ; sdfq; qwrwffsaa
c ; asda ; qdasasd
What I try to make is to create a excel file with python which has 2 sheets.
First sheet keeps first data file second one should keep second data file.
What I develop so far is:
#!/bin/python
import xlsxwriter
import smtplib
import datetime
now = datetime.datetime.now()
workbookname = 'Excel_'+now.strftime("%Y-%m-%d_%H:%M")+'.xlsx'
workbook = xlsxwriter.Workbook(workbookname)
worksheet = workbook.add_worksheet('Sheet1')
worksheet.write('A1', 'Hostname')
worksheet.write('B1', 'User Name')
worksheet2 = workbook.add_worksheet('User Privilege')
worksheet2.write('A1', 'Hostname')
worksheet2.write('B1', 'User Detail')
worksheet2.write('C1', 'Description')
with open('/tmp/file1.txt') as f:
content = f.read().splitlines()
i = 0
while i < len(content):
content2 = content[i].split(';')
worksheet.write('A'+str(i+2), content2[0])
worksheet.write('B'+str(i+2), content2[1])
workbook.close()
i = 0
while i < len(content):
with open('/tmp/file2.txt') as f:
content = f.read().splitlines()
worksheet2.write('A' + str(i + 2), content2[0])
worksheet2.write('B' + str(i + 2), content2[1])
worksheet2.write('C' + str(i + 2), content2[2])
i=i+1
workbook.close()
This script only works for the first sheet it does not write to second sheet.

With pandas this can be done in a couple of lines
import pandas
df1 = pandas.read_csv('file1.csv', sep = ';', header = None)
df2 = pandas.read_csv('file2.csv', sep = ';', header = None)
writer = pandas.ExcelWriter('output.xlsx')
df1.to_excel(writer, 'sheet 1')
df2.to_excel(writer, 'sheet 2')
writer.save()

Related

I Want to Compare two XML Files Using Python and Print Common attribute Values and Uncommon attribute Values in both the files

As Iam New to Python I need some help to compare two XML files.
These are the Following Conditions:
To print Common fullPath Name and Name (fullPath and Name are the attributes present in the XML file) between the two XML files.
To print the values which is present in only first file and not in second file.
To print the values which is present in only second file and not in first file.
Later, Have to print this output in excel file having different sheets.
for example (1st condition in sheet 1, 2nd condition in sheet2, 3rd condition in sheer3 of the same excel file.)
Can please anyone help me with the code that satisfies the above condition which I have mentioned.
This is the code which I have tried.
from lxml import etree
Base = etree.parse('Base.xml')
Target = etree.parse('Target.xml')
Base_fullPath = Base.xpath("//Member/#fullPath")
Target_fullPath = Target.xpath("//Member/#fullPath")
Base_name = Base.xpath("//Member/#name")
Target_name = Target.xpath("//Member/#name")
def match(Base_fullPath, Target_fullPath, Base_name,Target_name):
Base_fullPath_set = set(Base_fullPath)
Target_fullPath_set = set(Target_fullPath)
Base_name_set = set(Base_name)
Target_name_set = set(Target_name)
if (Base_fullPath_set & Target_fullPath_set, Base_name_set & Target_name_set):
x = open('C:\\Users\\pvl\\Desktop\\New folder\\Common_FullPath.csv', 'w')
y=(Base_fullPath_set & Target_fullPath_set)
z=(Base_name_set & Target_name_set)
print("common details Full Path: \n", *y, sep='\n', file = x)
print("\n")
x = open('C:\\Users\\pvl\\Desktop\\New folder\\Common_name.csv', 'w')
print("\n common details Name: \n", *z, sep='\n', file=x)
else:
print("No Matches Found")
match(Base_fullPath, Target_fullPath, Base_name,Target_name)
def non_match_elements(list_base, list_target):
non_match_base = []
non_match_target = []
for i in list_base:
if i not in list_target:
non_match_base.append(i)
for i in list_target:
if i not in list_base:
non_match_target.append(i)
return non_match_base
return non_match_target
list_base = Base.xpath("//Member/#*")
list_target = Target.xpath("//Member/#*")
non_match_base = non_match_elements(list_base, list_target)
x = open('C:\\Users\\pvl\\Desktop\\New folder\\Present_in_base.csv', 'w')
print("\n Base Details: \n", *non_match_base, sep='\n', file = x)
non_match_target = non_match_elements(list_target, list_base)
x = open('C:\\Users\\pvl\\Desktop\\New folder\\Present_in_target.csv', 'w')
print("\n Target Details: \n", *non_match_target, sep='\n', file = x)
import pandas as pd
df = pd.read_csv('C:\\Users\\pvl\\Desktop\\New folder\\Common_FullPath.csv')
df1 = pd.read_csv('C:\\Users\\pvl\\Desktop\\New folder\\Common_name.csv')
df2 = pd.read_csv('C:\\Users\\pvl\\Desktop\\New folder\\Present_in_base.csv', delimiter=';;', on_bad_lines = 'skip', engine = 'python' )
df3 = pd.read_csv('C:\\Users\\pvl\\Desktop\\New folder\\Present_in_target.csv', delimiter=';', on_bad_lines = 'skip', engine = 'python')
with pd.ExcelWriter("C:\\Users\\pvl\\Desktop\\New folder\\combined.xlsx") as writer:
df1.to_excel(writer, sheet_name="Common_name", index=False)
df2.to_excel(writer, sheet_name="base_Details", index=False)
df3.to_excel(writer, sheet_name = "target_Details", index=Fal

Is there a way to read and alter the contents of a huge csv file in PyCharm?

I'm attempting to create a program currently that can read a csv, determine if a substring is included in one of the columns of each row, and if it isn't present, rewrites certain columns to a new csv. I have the code down for this much- but the csv I need to use the program for has well over 3 million rows. I use PyCharm and currently I'm not able to process this much data. It can only view the csv in a read-only format which doesn't allow me to use it. I know pandas has a chunk size feature but I don't know how to implement this with the rest of my code.
def reading(csv_input):
originalLength = 0
rowCount = 0
with open(f'Web Report {csv_input}', 'w') as file:
writer = csv.writer(file)
writer.writerow(['Index', 'URL Category', 'User IP', 'URL'])
dropCount = 0
data = pd.read_csv(csv_input, chunksize=100000)
df = pd.DataFrame(data,
columns=['Line', 'Date', 'Hour', 'User Name', 'User IP', 'Site Name',
'URL Category', 'Action', 'Action Description'])
originalLength = len(df.index)
for line in range(originalLength):
dataLine = df.loc[line]
x = dataLine.get(key='Action')
if x == 0:
siteName = dataLine.get(key='Site Name')
if 'dbk' in siteName:
dropCount = dropCount + 1
elif 'ptc' in siteName:
dropCount = dropCount + 1
elif 'wcf' in siteName:
dropCount = dropCount + 1
elif 'google' in siteName:
dropCount = dropCount + 1
else:
writer.writerow([line, # Original Index
df.loc[line].get(key='URL Category'), # Original URL Category
df.loc[line].get(key='User IP'), # Original User IP
df.loc[line].get(key='Site Name')]) # Original Site Name
rowCount = rowCount + 1
else:
dropCount = dropCount + 1
file.close()
print("Input: " + str(csv_input))
print("Output: " + str(file.name))
print("Original Length: " + str(originalLength))
print("Current Length: " + str(rowCount))
print("Drop Count: " + str(dropCount) + "\n")
return df
If you use csv to write file then you could use it also to read row by row.
import csv
with open('input.csv') as infile, open('output.csv', 'w') as outfile:
csv_reader = csv.reader(infile)
csv_writer = csv.writer(outfile)
# copy headers
headers = next(csv_reader)
csv_writer.writerow(headers)
# process rows
for row in csv_reader: # read row by row
# keep only rows with even index
if int(row[0]) % 2 == 0:
print('--- row ---')
print(row)
csv_writer.writerow(row)
If you want to use pandas with chunk then you should use for-loop for this.
And when you write with pandas then you need append mode without headers.
import pandas as pd
first = True
for df in pd.read_csv('input.csv', chunksize=1): # read row by row
# keep only rows with even index
if df.index % 2 == 0:
print('--- row ---')
print(df)
if first:
# create new file with headers
df.to_csv('output.csv', mode='w')
first = False
else:
# append to existing file without headers
df.to_csv('output.csv', mode='a', header=False)
Minimal working code
import pandas as pd
import csv
# --- create some data ---
data = {
'A': range(0,10),
'B': range(10,20),
'C': range(20,30),
} # columns
df = pd.DataFrame(data)
df.to_csv('input.csv', index=False)
# --- read and write with `pandas` ---
first = True
for df in pd.read_csv('input.csv', chunksize=1): # read row by row
# keep only rows with even index
if df.index % 2 == 0:
print('--- row ---')
print(df)
if first:
# create empty with headers
df.to_csv('output_pandas.csv', mode='w')
first = False
else:
# append to existing file without headers
df.to_csv('output_pandas.csv', mode='a', header=False)
# --- read and write with `csv` ---
with open('input.csv') as infile, open('output.csv', 'w') as outfile:
csv_reader = csv.reader(infile)
csv_writer = csv.writer(outfile)
# copy headers
headers = next(csv_reader)
csv_writer.writerow(headers)
# process rows
for row in csv_reader:
# keep only rows with even index
if int(row[0]) % 2 == 0:
print('--- row ---')
print(row)
csv_writer.writerow(row)
Doc: read_csv(), to_csv()

Using xlwings to open excel sheet. Need to search a string and print full line

I'm using xlwings to open excel sheet. Need to search a string in a specific column and print full line of the string without search item and until new line(\n). Output should be in new column of same sheet.
Input:
search string: [game]
Output:
import xlwings as xw
open excel file using xlwings
filename = r'input.xlsx'
book = xw.Book(filename)
sheet = book.sheets[0]
find the last row of the sheet on a specific range in this case from column 'A'
lrow = sheet.range('A' + str(sheet.cells.last_cell.row)).end('up').row
declare a separate variable for the string that you will search and the column where your output will be located.
search_string = '[game]'
sheet.range('B1').value = 'output'
output_index = 2
now loop through that range to see if your search_string is in that range
for i in range(1, lrow + 1):
if search_string in str(sheet.range('A{}'.format(i)).value):
temp = str(sheet.range('A{}'.format(i)).value)
temp = temp.split(search_string)[1]
if '[' in temp:
temp = temp.split('[')[0]
sheet.range('B{}'.format(output_index)).value = temp
output_index += 1
book.save()
book.close()
Below is the full code >>
import xlwings as xw
filename = r'input.xlsx'
book = xw.Book(filename)
sheet = book.sheets[0]
lrow = sheet.range('A' + str(sheet.cells.last_cell.row)).end('up').row
search_string = '[game]'
sheet.range('B1').value = 'output'
output_index = 2
for i in range(1, lrow + 1):
if search_string in str(sheet.range('A{}'.format(i)).value):
temp = str(sheet.range('A{}'.format(i)).value)
temp = temp.split(search_string)[1]
if '[' in temp:
temp = temp.split('[')[0]
sheet.range('B{}'.format(output_index)).value = temp
output_index += 1
book.save()
book.close()

how to parse a txt file to csv and modify formatting

Is there a way I can use python to take my animals.txt file results and convert it to csv and format it differently?
Currently the animals.txt file looks like this:
ID:- 512
NAME:- GOOSE
PROJECT NAME:- Random
REPORT ID:- 30321
REPORT NAME:- ANIMAL
KEYWORDS:- ['"help,goose,Grease,GB"']
ID:- 566
NAME:- MOOSE
PROJECT NAME:- Random
REPORT ID:- 30213
REPORT NAME:- ANIMAL
KEYWORDS:- ['"Moose, boar, hansel"']
I would like the CSV file to present it as:
ID, NAME, PROJECT NAME, REPORT ID, REPORT NAME, KEYWORDS
Followed by the results underneath each header
Here is a script I have wrote:
import re
import csv
with open("animals.txt") as f: text = f.read()
data = {}
keys = ['ID', 'NAME', 'PROJECT NAME', 'REPORT ID', 'REPORT NAME', 'KEYWORDS']
for k in keys:
data[k] = re.findall(r'%s:- (.*)' % k, text)
csv_file = 'out.csv'
with open(csv_file, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=keys)
writer.writeheader()
for x in data:
writer.writerow(x)
An easy way to do is parsing using regex and store them in a dict, just before you write the final csv:
import re
# `text` is your input text
data = {}
keys = ['ID', 'NAME', 'PROJECT NAME', 'REPORT ID', 'REPORT NAME', 'KEYWORDS']
for k in keys:
data[k] = re.findall(r'%s:- (.*)' % k, text)
And to CSV:
import csv
csv_file = 'out.csv'
with open(csv_file, 'w') as csvfile:
writer = csv.writer(csvfile, quoting=csv.QUOTE_NONE, escapechar='\\')
writer.writerow(data.keys())
for i in range(len(data[keys[0]])):
writer.writerow([data[k][i] for k in keys])
Output in csv:
ID,NAME,PROJECT NAME,REPORT ID,REPORT NAME,KEYWORDS
512,GOOSE,Random,30321,ANIMAL,['\"help\,goose\,Grease\,GB\"']
566,MOOSE,Random,30213,ANIMAL,['\"Moose\, boar\, hansel\"']
Note that I used re.M multiline mode since there's a trick in your text, preventing matching ID twice! Also the default write rows needed to be twisted.
Also uses \ to escape the quote.
This should work:
fname = 'animals.txt'
with open(fname) as f:
content = f.readlines()
content = [x.strip() for x in content]
output = 'ID, NAME, PROJECT NAME, REPORT ID, REPORT NAME, KEYWORDS\n'
line_output = ''
for i in range(0, len(content)):
if content[i]:
line_output += content[i].split(':-')[-1].strip() + ','
elif not content[i] and not content[i - 1]:
output += line_output.rstrip(',') + '\n'
line_output = ''
output += line_output.rstrip(',') + '\n'
print(output)
That's the code in Autoit (www.autoitscript.com)
Global $values_A = StringRegExp(FileRead("json.txt"), '[ID|NAME|KEYWORDS]:-\s(.*)?', 3)
For $i = 0 To UBound($values_A) - 1 Step +6
FileWrite('out.csv', $values_A[$i] & ',' & $values_A[$i + 1] & ',' & $values_A[$i + 2] & ',' & $values_A[$i + 3] & ',' & $values_A[$i + 4] & ',' & $values_A[$i + 5] & #CRLF)
Next

Python - copy and paste values from a formula into another column (openpyxl or other libs)

I am attempting to clean up an excel file and essentially what I want to do is this: I have inserted a formula in column B that references column A and would like to then take all the values in column B and paste special as values over column A. After, I would like to delete column B.
Does anyone know of anyway to do this via openpyxl library or another library?
Here is a copy of my code thus far:
import openpyxl as xl
import pandas as pd
import datetime as dt
import os
def left(s, amount):
return s[:amount]
def right(s, amount):
return s[-amount:]
def mid(s, offset, amount):
return s[offset:offset+amount]
filepath = r'...desktop\Scripts' + '\\'
filename = 'file.xlsx'
fn = filepath + filename
WB = xl.load_workbook(fn, data_only = True)
WS = WB.worksheets[1]
for i in sorted(WS.merged_cell_ranges):
WS.unmerge_cells(str(i))
WB.save(filepath + 'unmerged.xlsx')
unmerged = pd.read_excel(filepath + 'unmerged.xlsx', sheetname = 1)
unmerged.insert(1, 'ifIsBlank', '')
unmerged.insert(5, 'trimSubst', '')
inserted = unmerged.to_excel(filepath + 'inserted.xlsx', index = False)
WB = xl.load_workbook(filepath + 'inserted.xlsx')
WS = WB.worksheets[0]
WS['A'][0].value = 'BU'
WS['E'][0].value = 'em name'
blankCount1 = 1
blankCount2 = 0
trimCount = 1
for i, cell in enumerate(WS['B']):
cell.value = '=IF(ISBLANK(A' + str(blankCount1) + '), B' + str(blankCount2) + ', A' + str(blankCount1) + ')'.format(i)
blankCount1 += 1
blankCount2 += 1
for j, cell in enumerate(WS['F']):
cell.value = '=TRIM(SUBSTITUTE(E' + str(trimCount) + ', "~", "_"))'.format(j)
trimCount += 1
WS['B'][0].value = 'Category Name'
WS['F'][0].value = 'Item Name'
WB.save(filepath + 'formula.xlsx')
valWB = xl.load_workbook(filepath + 'formula.xlsx', data_only = True)
So the issue I'm running into is that when I read formula.xlsx as a .csv (desired file type of my final output), it shows nothing in the cells that have the formula- Is there a way to essentially copy and paste the values that come out of the formula into another column?
Thanks!

Categories