Recalling sheet names for a while loop - python

I have imported xlrd etc. The main part of my code is then as follows:
for serie_diam in range(0,9):
namesheet = "Diamètre " + str(serie_diam)
#select(namesheet)
numLine = sh.row_values(3)
OK = 1
while OK == 1:
d = sh1(numLine, 1)
D = sh1(numLine, 2)
rs = sh1(numLine, 7)
for i in range(4):
BB = sh1(numLine, 2 + i)
if BB != 0:
print repr(d).rjust(2), repr(D).rjust(3), repr(B).rjust(4), repr(rs).rjust(5)
I have 7 sheets in my xls file overall and I would like to know how I can loop through these in the same while loop as OK == 1 where for the moment I have written just 'sh1'.
I'm sorry if this question is too easy!

import xlrd
book = xlrd.open_workbook('xlrd_test.xls')
for sheet in book.sheets():
print sheet.row(0) # do stuff here - I'm printing the first line as example
# or if you need the sheet index for some purpose:
for shidx in xrange(0, book.nsheets):
sheet = book.sheet_by_index(shidx)
# would print 'Page N, first line: ....'
print 'Page %d, first line: %s' % (shidx, sheet.row(0))

Related

using python to extract data from excel

Hi I have a python program that will read data value from an excel file and extract specific data value and create a new excel with the extracted data value.
the excel file that python reads contains 3 columns 1- time, 2-Elapsed Time, and 3-Duct temp.
the duct temperature is stored in this file as
['duct1 temperature', [25.000882991454244, 25.002648974362724, 25.00387452337855, 25.004724896765367, 25.00531481876751, 25.005723932326624, . . . . .]]
This is all in 1 cell (column 3)
each number is a node, so node 1 is extracted as 25.000882991454244
and node 2 is extracted as 25.002648974362724
so the program works fine but I have some issues.
1- is that if I say I want 512 values it gives me an error
Traceback (most recent call last):
File "ColumnExtractor.py", line 126, in <module>
main(0, None)
File "ColumnExtractor.py", line 114, in main
timeData, data = readFile(directory, fileName)
TypeError: cannot unpack non-iterable NoneType object```
2- is that I can only extract each value at a time. so if I wanted to extract the 512 values I will have to do it manually and that will take a long time.
so my question is is there any way to adjust this to extract every data value in the original file and store them in their each own column in the new excel file?
What is the file directory? /Users/jack/Downloads/1copy/untitledfolder
What would you like the new file to be called? results
How many values would you like to take? 4
Duct_temp_values.csv
Which node number to read? 3
Which column is the data in? 3
What would you like the first column to be called? temp
What would you like the final column to be called? time
Which node number to read? 1
Which column is the data in? 3
What would you like the first column to be called? temp2
What would you like the final column to be called? time
Which node number to read? 4
Which column is the data in? 3
What would you like the first column to be called? temp3
What would you like the final column to be called? time
Which node number to read? 5
Which column is the data in? 3
What would you like the first column to be called? temp4
What would you like the final column to be called? time ```
orignal file
results
the code it self is
import os
import csv
import xlwt
def getInputs():
directoryFound = False
csvFileNames = []
convertedDir = ""
while not directoryFound:
convertedDir = ""
folderDir = input("What is the file directory? ")
for char in folderDir:
if char == "\\":
convertedDir += "/"
else:
convertedDir += char
try:
openFolder = os.listdir(convertedDir)
for file in openFolder:
if file[-3:] == "csv":
directoryFound = True
csvFileNames.append(file)
except:
print("That directory doesn't exist.\n")
long = False
while not long:
output = input("What would you like the new file to be called? ")
if len(output) > 0:
long = True
return output, csvFileNames, convertedDir
def readFile(directory, fileName):
found = False
while not found:
try:
file = open(f"{directory}/{fileName}", "r")
found = True
reader = csv.reader(file)
data = []
timeData = []
lineNumber = 0
for line in reader:
if lineNumber % 2 == 1:
data.append(line)
lineNumber += 1
file.close()
column = int(input("Which node number to read? "))
columnWithData = int(input("Which column is the data in? "))
exportArray = []
for i, dataRow in enumerate(data):
timeData.append(dataRow[0])
mainData = dataRow[-1].split("[")[-1]
dataArray = mainData.split(" ")
for i, point in enumerate(dataArray):
newPoint = ""
for char in point:
if char != "," and char != "'" and char != "[" and char != "]":
newPoint += char
dataArray[i] = newPoint
exportArray.append(dataArray[column - 1])
firstName = input("What would you like the first column to be called? ")
secondName = input("What would you like the final column to be called? ")
return [secondName] + timeData, [firstName] + exportArray
except:
print("Couldn't find a file with that name.\n")
def main(incr, openedBook):
book = openedBook
if not openedBook:
book = xlwt.Workbook()
output, csvFile, directory = getInputs()
for fileName in csvFile:
if "junction" not in fileName.lower():
number = int(input("\nHow many values would you like to take? "))
print("\n" + fileName, "\n")
sheet = book.add_sheet(fileName)
for j in range(number):
timeData, data = readFile(directory, fileName)
for i, value in enumerate(data):
if j == 0:
sheet.write(i, number, timeData[i])
sheet.write(i, j, value)
book.save(f"{output}.xls")
if __name__ == "__main__":
main(0, None)

How to iterate over the rows from 2 files, compare the values and update the value in a file when the condition is met?

For changing the values from 10 to 18, 19 or 20, I am splitting the string, access the substrings and then trying to change it. Its working but just not changing the values. Here is the solution I am trying to implement:
oldFileName = 'tryout.hmo'
newFileName = 'tryout_NEW.hmo'
topoFileName = 'Density.topo'
readme = open( oldFileName, "r" )
oldLines = readme.readlines()
readme = open(topoFileName, "r")
Lines = readme.readlines()
readme.close()
newFile = open(newFileName,"w")
for row in oldLines:
for line in Lines:
tmp = line.split()
list = row.rstrip()
tmp1 = list.split()
newFile.write(row)
if row.find("BEG_ELEM_DATA") > -1:
if tmp[0] == tmp1[0]:
if tmp[2] == 1 and tmp[3] == 0:
# it is magnet, value 18
newFile.write(tmp1.replace(tmp1[1], "18"))
elif tmp[2] == 1 and tmp[3] == 1:
# it is iron, value 19
newFile.write(tmp1.replace(tmp1[1], "19"))
else:
# it is air, value 20
newFile.write(tmp1.replace(tmp1[1], "20"))
newFile.close()
I would really appreciate it if you could able to solve this problem in above script, then I guess it should work.
I'm also still a beginner in Python, but I tried to solve your problem and here is my solution:
I guess there are way better ways to do it because here you have to import all data to a dataframe before comparing it.
Also I don't know if you can read your data with pd.read_csv to a dataframe because I don't know *.hmo and *.topo
import pandas as pd
df = pd.read_csv('tryout.csv', delimiter=';')
df2 = pd.read_csv('density.csv', delimiter=';')
for idx, row in df.iterrows():
for idx2, row2 in df2.iterrows():
if row[0] == row2[0]:
if row2[2] == 1 and row2[3] == 0 :
# it is magnet, value 18
row[1] = 18
elif row2[2] == 1 and row2[3] == 1 :
# it is iron, value 19
row[1] = 19
else:
# it is air, value 20
row[1] = 20
df.to_csv('new_tryout.csv')
What my code is doing here, it loads both files to dataframes. Then iterate over every line to compare where the ID in both files is the same (e.g 3749).
If true there are the 3 if statements whether it is magnet/iron/air and change the value in df to the right number.
At the end save the new df to a new file 'new_tryout.csv'
I created 2 testfiles for it and it worked the way it should.
Finally, here is the solution you were searching for.
import pandas as pd
df2 = pd.read_csv('Density.topo', header = 0, names = list('ABCD'), delimiter=r'\s+', skiprows=1)
df2[['C', 'D']]= df2[['C', 'D']].round()
new_file_content=''
with open('tryout.hmo', 'r') as f:
for line in f:
if line[11:13] == '10':
if line[3].isspace():
ID_to_search_for = line[4:8] # number with 4 digits
else:
ID_to_search_for = line[3:8] # number with 5 digits
search_idx = df2[df2['A'] == ID_to_search_for].index[0]
if df2['C'][search_idx] == 1 and df2['D'][search_idx] == 0:
change = '18' #magnet
new_line = line[:11] + change + line[13:]
elif df2['C'][search_idx] == 1 and df2['D'][search_idx] == 1:
change = '19' #iron
new_line = line[:11] + change + line[13:]
else:
change = '20' #air
new_line = line[:11] + change + line[13:]
new_file_content += new_line
else:
new_file_content += line
with open('tryout_changed.hmo', 'w') as f:
f.write(new_file_content)
if you don't want to use dataframes, you can do it like this:
with open('density.topo') as f:
lists_of_list = [line.rstrip().split() for line in f]
new_file_content=''
with open('tryout_test.hmo', 'r') as f:
for line in f:
if line[11:13] == '10':
if line[3].isspace():
ID_to_search_for = line[4:8] # number with 4 digits
else:
ID_to_search_for = line[3:8] # number with 5 digits
for idx, sublist in enumerate(lists_of_list):
if sublist[0] == ID_to_search_for:
if lists_of_list[idx][2] == 1 and lists_of_list[idx][3] == 0:
change = '18' #magnet
new_line = line[:11] + change + line[13:]
elif lists_of_list[idx][2] == 1 and lists_of_list[idx][3] == 1:
change = '19' #iron
new_line = line[:11] + change + line[13:]
else:
change = '20' #air
new_line = line[:11] + change + line[13:]
new_file_content += new_line
else:
new_file_content += line
with open('tryout_changed.hmo', 'w') as f:
f.write(new_file_content)
ok, here is my final answer. It does (again) all things you were searching for. Please debug your code in your IDE if there is a problem. You should start using context manager instead of open and closing files step by step.
I wrote the new code around your code in the question and added some comments to it.
oldFileName = 'tryout.hmo'
newFileName = 'tryout_NEW.hmo'
topoFileName = 'Density.topo'
readme = open( oldFileName, "r" )
oldLines = readme.readlines()
m = int(oldLines[3])
print(m)
new_m = m+3
m1 = str(m)
new_m1 = str(new_m)
Phrase = "END_COMP_DATA"
#n = "Phrase not found" #not used --> not needed
with open(oldFileName,"r") as oldFile:
for number, lin in enumerate(oldFile):
if Phrase in lin:
n = number
#insert 3 lines to tryout_new at the right position (--> row n)
magnet = f" {m+1} "'" topo_magnet"'"\n"
iron = f" {m+2} "'" topo_iron"'"\n"
air = f" {m+3} "'" topo_air"'"\n"
oldLines[n:n] = [magnet, iron, air]
newFile = open(newFileName,"w")
flag = 0
with open('density.topo') as f:
data_density = [line.rstrip().split() for line in f]
for idx, row in enumerate(oldLines):
lst = row.rstrip() #I think you shouldn't name a variable like a class in python (list). use 'lst' or something like that
tmp_tryout = lst.split()
if row.find("BEG_ELEM_DATA") > -1:
flag = 1
if flag == 1 and len(tmp_tryout)>1:
# if the column has more than 2 columns (after split), check for the "10"
if tmp_tryout[1] == '10':
# density_idx_line searchs in density.topo for a match with tmp_tryout[0] (e.g. 3749) and stores the whole line
density_idx_line = list(filter(lambda x: x[0] == tmp_tryout[0], data_density))
if len(density_idx_line) >0:
if density_idx_line[0][2] == '1.0' and density_idx_line[0][3] == '1e-05':
# the ' 10 ' is the 10 with a whitespace before and after it. Only like this only the 10 gets replaced (and not e.g. 3104 to 3184)
newFile.write(row.replace(' 10 ', ' 18 '))
elif density_idx_line[0][2] == '1.0' and density_idx_line[0][3] == '1.0':
newFile.write(row.replace(' 10 ', ' 19 '))
else:
newFile.write(row.replace(' 10 ', ' 20 '))
else:
newFile.write(row)
else:
if idx == 3:
newFile.write(row.replace(m1, new_m1))
else:
newFile.write(row)
newFile.close()
print ("script terminated successfully!")
ok, here is another solution. For anybody else who reads this: this is still only a temporary solution but #Sagar and me both don't know to do it better.
import pandas as pd
df = pd.read_csv('tryout.hmo', header = 0, names = list('ABCDEFGHIJKLM'), delimiter=r'\s+', skiprows=[i for i in range(52362)])
df2 = pd.read_csv('Density.topo', header = 0, names = list('ANOP'), delimiter=r'\s+', skiprows=1)
df2 = df2.iloc[:-3, :]
df3 = df.merge(df2, how='outer', on='A')
df3[['O','P']] = df3[['O','P']].fillna(-1).astype(int).replace(-1, np.nan)
df3['B']= df3.apply(lambda x: 18 if x['B']==10 and x['O']==1 and x['P']==0 else (
19 if x['B']==10 and x['O']==1 and x['P']==1 else (
20 if x['B']==10 and x['O']==0 and x['P']==0 else x['B'])), axis=1)
df3.to_csv('new_tryout.csv')
It finished the code in less than a second, so it is far better than iterrows or itertuples.
The new csv file includes both the tryout file and the density file. They are merged together by the first column of tryout file (ID i guess)
I didn't check all of this very big file but from the few random points I checked, it seems as this way works.

Read .xls with xlrd in Python

I'm trying to get a list of backgrounds for each shots in an .xls document but I have no idea how to say to stop reading column and rows when it's a different shot...
The .xls I'm reading is like this:
and my test code is there:
import xlrd
planNameToFind = '002'
backgroundsList = []
openFolderPath = 'I:\\manue\\REFS\\516\\ALCM_516_SceneAssetList.xls'.format().replace('/','\\')
wb = xlrd.open_workbook(openFolderPath)
sheets = wb.sheet_by_index(0)
nrows = sheets.nrows
allcols = sheets.ncols
episode_index = 0
shot_index = 2
background_index = 9
rst = 1
seqrow = rst + 1
for rowx in xrange(rst + 2, nrows + 1):
planName = str(sheets.cell_value(seqrow, shot_index)).replace('.0', '')
if planName == planNameToFind:
print 'planName',planName
background = str(sheets.cell_value(seqrow, background_index)).replace('.0', '')
print 'background: ',background
backgroundsList.append(background)
if planName == '':
background = str(sheets.cell_value(seqrow, background_index)).replace('.0', '')
if background != '':
print 'background2: ',background
backgroundsList.append(background)
#shotToDo = ' shotToDo: {0} BG: {1}'.format(planName,background)
seqrow += 1
print 'backgroundsList: ',backgroundsList
My result log alls the backgrounds in the .xls, but I need only backgrounds of shot '002' (here only 3 backgrounds). Does someone know how to read backgrounds only for a shot?
Your best bet is to work with Panda Dataframes. It's extremely easy to clean data and work with it.

Using xlwings to open excel sheet. Need to search a string and print full line

I'm using xlwings to open excel sheet. Need to search a string in a specific column and print full line of the string without search item and until new line(\n). Output should be in new column of same sheet.
Input:
search string: [game]
Output:
import xlwings as xw
open excel file using xlwings
filename = r'input.xlsx'
book = xw.Book(filename)
sheet = book.sheets[0]
find the last row of the sheet on a specific range in this case from column 'A'
lrow = sheet.range('A' + str(sheet.cells.last_cell.row)).end('up').row
declare a separate variable for the string that you will search and the column where your output will be located.
search_string = '[game]'
sheet.range('B1').value = 'output'
output_index = 2
now loop through that range to see if your search_string is in that range
for i in range(1, lrow + 1):
if search_string in str(sheet.range('A{}'.format(i)).value):
temp = str(sheet.range('A{}'.format(i)).value)
temp = temp.split(search_string)[1]
if '[' in temp:
temp = temp.split('[')[0]
sheet.range('B{}'.format(output_index)).value = temp
output_index += 1
book.save()
book.close()
Below is the full code >>
import xlwings as xw
filename = r'input.xlsx'
book = xw.Book(filename)
sheet = book.sheets[0]
lrow = sheet.range('A' + str(sheet.cells.last_cell.row)).end('up').row
search_string = '[game]'
sheet.range('B1').value = 'output'
output_index = 2
for i in range(1, lrow + 1):
if search_string in str(sheet.range('A{}'.format(i)).value):
temp = str(sheet.range('A{}'.format(i)).value)
temp = temp.split(search_string)[1]
if '[' in temp:
temp = temp.split('[')[0]
sheet.range('B{}'.format(output_index)).value = temp
output_index += 1
book.save()
book.close()

Openpyxl won't save file

For some reason Openpyxl won't save the the xlsx file at the end of the program.
I am trying to read measurments from a file, each line is a different measurement. I want to take them and write them to excel as to make using this data later on easier. Everything seems to work, but in the end the data isn't saved, if i create new file where the changes should be saved it will not be created.
from openpyxl import load_workbook
from openpyxl import Workbook
wb = load_workbook(filename='Data_Base.xlsx')
sheet = wb.worksheets[0]
BS = []
Signal = []
with open('WifiData2.txt') as f:
for line in f:
y = int(line.split('|')[0].split(';')[3])
x = int(line.split('|')[0].split(';')[2])
floor = int(x = line.split('|')[0].split(';')[1])
data = line.split("|")[1].strip()
measurements = data.split(";")
for l in measurements:
raw = l.split(" ")
BSSID = raw[0]
signal_strength = raw[1]
print(signal_strength)
BS.append(BSSID)
Signal.append(signal_strength)
for row_num in range(sheet.max_row):
num = row_num
if row_num > 1:
test_X = int(sheet.cell(row=row_num, column=4).value)
test_Y = int(sheet.cell(row=row_num, column=3).value)
test_floor = int(sheet.cell(row=row_num, column=2).value)
if (test_X == x and test_Y == y and test_floor == floor):
nr = nr + 1
if (nr > 3):
q = 1
if (q == 0):
sheet.cell(row=sheet.max_row+1, column = 2, value = floor)
sheet.cell(row=sheet.max_row + 1, column=3, value=x)
sheet.cell(row=sheet.max_row + 1, column=4, value=y)
sheet.cell(row=sheet.max_row + 1, column=2, value=sheet.max_row)
for element in BS:
nr = 0
for col in sheet.max_column:
if BS[element] == sheet.cell(row=1, column=col).value:
sheet.cell(row=sheet.max_row + 1, column=col, value=Signal[element])
nr = 1
if (nr == 0):
sheet.cell(row=1, column=sheet.max_column+1, value=BS[element])
sheet.cell(row=sheet.max_row+1, column=sheet.max_column + 1, value=BS[element])
Signal.clear()
BS.clear()
wb.save('Data_Base1.xlsx')
What is weird that if i save the workbook earlier it will create the file. Of course it doesnt really work for me since any changes that i want made won't be made. I had similar issue when i tried it with xlrd/wt/utils combo. Does any1 know where the problem is ?
Use absolute path instead of relative path will do the trick!
Add
wb.template = False
before
wb.save('Filename.xlsx')

Categories