python create daily folder - python

I would like to read from the serial of RPi and store the data in a daily folder as a 'csv.' file. I can create a file, write/read to/from csv file and had the serial comm working with putty for now (tried in a different project). In the future, the comm is going to be between pi and a various sensor. Considering everything else is working I am not sure how to create a seperate file automatically for each day. This is what I've done so far;
import serial
import time
import csv
def readLine(port)
rv = ""
while True:
ch = port.read()
rv += ch
if ch == '\r' or ch =='':
return rv
port = serial.Serial("/dev/ttyAMA0", baudrate = 115200, timeout = 10)
while True:
rcv=readLineCR(port)
str1 = time.strftime("%d%m%y")
file = open('directory....')
with open('test.csv', 'w') as fp:
a = csv.writer(fp, delimiter=',')
# data to be tested
data = [[str1,'1234'],[str1,'4321']]
a.writerows(data)
print('csv is created on: ' + str1)
reader = csv.reader(file)
for line in reader:
print(line)
Any help would be appreciated

Use datetime.datetime.now().strftime("%Y-%d-%m") to create folder name, os.path.exists(...) to check if folder exists and os.mkdir(...) to create new folder.

Thank you #furas. this is what I did and seems like its working.
import os
todayDate = time.strftime("%d-%m-%y")
directory = '/home/pi/...' + todayDate
if not os.path.exists(directory)
os.makedirs(directory)

Related

flle processing using multiprocessing - python

I am beginner to Python and trying to add few lines of code to convert json to csv and back to json. Have thousands of files (size 300 MB) to be converted and processed. With current program (using 1 CPU), i am not able to use 16 CPUs of server and need suggestions to fine tune the program for multiprocessing. Below is my code with python 3.7 version.
import json
import csv
import os
os.chdir('/stagingData/Scripts/test')
for JsonFile in os.listdir(os.getcwd()):
PartialFileName = JsonFile.split('.')[0]
j = 1
with open(PartialFileName +".csv", 'w', newline='') as Output_File:
with open(JsonFile) as fileHandle:
i = 1
for Line in fileHandle:
try:
data = json.loads(Line, parse_float=str)
except:
print("Can't load line {}".format(i))
if i == 1:
header = data.keys()
output = csv.writer(Output_File)
output.writerow(header) #Writes header row
i += 1
output.writerow(data.values()) #writes values row
j += 1
Appreciate suggestions on multiprocessing logic
If you have a single big file that you want to process more effectively I suggest the following:
Split file into chunks
Create a process to process each chunk
(if necessary) merge the processed chunks back into a single file
Something like this:
import csv
import json
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor
source_big_file = Path('/path/to/file')
def chunk_file_by_line(source_filepath: Path, chunk_size: int = 10_000):
chunk_line_size = 10_000
intermediate_file_handlers = {}
last_chunk_filepath = None
with source_big_file.open('r', encoding='utf8') as big:
for line_number, line in big:
group = line_number - (line_number % chunk_line_size)
chunk_filename = f'{source_big_file.stem}.g{group}{source_big_file.suffix}'
chunk_filepath = source_big_file.parent / chunk_filename
if chunk_filepath not in intermediate_file_handlers:
file_handler = chuck_filepath.open('w', encoding='utf8')
intermediate_file_handlers[chunk_filepath] = file_handler
if last_chunk_filepath:
last_file_hanlder = intermediate_file_handlers[last_chunk_filepath]
last_file_handler.close()
yield last_chunk_filepath
else:
file_handler = intermediate_file_handlers[chunk_filepath]
file_handler.write(line)
last_chunk_filepath = chunk_filepath
# output last one
yield last_chunk_filepath
def json_to_csv(json_filepath: Path) -> Path:
csv_filename = f'{json_filepath.stem}.csv'
csv_filepath = json_filepath.parent / csv_filename
with csv_filepath.open('w', encoding='utf8') as csv_out, json_filepath.open('r', encoding='utf8') as json_in:
dwriter = csv.DictWriter(csv_out)
headers_written = False
for json_line in json_in:
data = json.loads(json_line)
if not headers_written:
# create header record
headers = {k:k for k in data.keys()}
dwriter.writeline(headers)
headers_written = True
dwriter.writeline(data)
return csv_filepath
with ProcessPoolExecutor() as pool:
futures = []
for chunk_filepath in chuck_file_by_line(source_big_file):
future = pool.submit(json_to_csv, chunk_filepath)
futures.append(future)
# wait for all to finish
for future in futures:
csv_filepath = future.result(timeout=None) # waits until complete
print(f'conversion complete> csv filepath: {csv_filepath}')
Since you have many files, the simplest multiprocessing example from the documentation should work for you. https://docs.python.org/3.4/library/multiprocessing.html?highlight=process
f(JsonFile):
# open input, output files and convert
with Pool(16) as p:
p.map(f, os.listdir(os.getcwd()))
You could also try replacing listdir with os.scandir(), which doesn't have to return all directory entries before starting.

How to send the data written to a file (.csv) to a folder on my desktop (directory)?

Essentially the data is temperatures from 4 different states over the course of 12 months, so there is 48 files to be populated into my folder on my desktop directory. But I am not sure how to take the data being pulled from the web and then take the files being saved in my program to be sent to the directory of my desktop. That's what I am confused about, how to take the files being created on in my program and send them to a folder on my desktop.
I am copying the data from the web, cleaning it up, then saving it into a file, then taking that file and wanting to save it to a folder on my desktop.
Here is the code:
import urllib
def accessData(Id, Month):
url = "https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=" + str(Id) + "&year=2017&month=" + str(Month) + "&graphspan=month&format=1"
infile = urllib.urlopen(url)
readLineByLine = infile.readlines()
infile.close()
return readLineByLine
f = open('stations.csv', 'r')
for line in f.readlines():
vals = line.split(',')
for j in range(1,13): # accessing months here from 1 to 12, b/c 13 exclusive
data = accessData(line, j)
filename = "{}-0{}-2017.csv".format(vals[0], j)
print(str(filename))
row_count = len(data)
for i in range(2, row_count):
if(data[i] != '<br>\n' and data[i] != '\n'):
writeFile = open(filename, 'w')
writeFile.write(data[i])
openfile = open(Desktop, writeFile , 'r')
file.close()
Have you tried running the script from your desktop. It looks like you haven't specified a directory. So maybe running from your desktop should output your results to your current working directory.
Alternatively, you could try use the in-built os library.
import os
os.getcwd() # to get the current working directory
os.chdir(pathname) # change your working directory to the path specified.
This would change your working directory to the place you want to save your files.
Also, in regards to the last four lines of your code. file is not open, so you cannot close this. Also, I do not believe you need the openfile statement.
writeFile = open(filename, 'w')
writeFile.write(data[i])
openfile = open(Desktop, writeFile , 'r')
file.close()
Try this instead.
with open(filename, 'w') as writeFile:
for i in range(2, row_count):
if(data[i] != '<br>\n' and data[i] != '\n'):
writeFile.write(data[i])
Using this approach you shouldn't need to close the file. 'w' is to write as if a new file, change this to 'a' if you need to append to the file.
You just need to provide writeFile.write() with the path to your destination file, rather than just the filename (which will otherwise be saved into your current working directory.)
Try something like:
f = open('stations.csv', 'r')
target_dir = "/path/to/your/Desktop/folder/"
for line in f.readlines():
...
# We can open the file outside your inner "row" loop
# using the combination of the path to your Desktop
# and your filename
with open(target_dir+filename, 'w') as writeFile:
for i in range(2, row_count):
if(data[i] != '<br>\n' and data[i] != '\n'):
writeFile.write(data[i])
# The "writeFile" object will close automatically outside the
# "with ... " block
As others have mentioned, you could approach this two different ways:
1) Run the script directly from the directory to which you would like to save the files. Then you would just need to specify the full path to the .csv file you are reading.
2) You could provide the full path to where you would like to save the files when you write them, however this seems more intensive and unnecessary.
On another note, when opening files for the purpose of reading/writing them, use with to simply open the file for as long as you need it, then when you exit the with statement, the file will automatically be closed.
Here is an example of Option 1 with some clean-up:
import urllib
def accessData(Id, Month):
url = "https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=" + str(Id) + "&year=2017&month=" + str(Month) + "&graphspan=month&format=1"
infile = urllib.urlopen(url)
readLineByLine = infile.readlines()
infile.close()
return readLineByLine
with open('Path to File' + 'stations.csv', 'r') as f:
for line in f.readlines():
vals = line.split(',')
for j in range(1,13):
data = accessData(line, j)
filename = "{}-0{}-2017.csv".format(vals[0], j)
with open(filename, 'w') as myfile:
for i in range(2, len(data)):
if data[i]!='<br>\n' and data[i]!='\n':
myfile.write(data[i])
print(filename + ' - Completed')

how to write system info to a spreadsheet in python

I am trying to write system info to a spreadsheet. but when I try to use my variables they come out black
import csv
import os
import linecache
os.system('getmac -v > mac.txt')
os.system("wmic bios get serialnumber > serial.txt")
os.system("wmic computersystem get model > model.txt")
os.system("hostname > hostname.txt")
os.system("ipconfig > ip.txt")
open('ip1.txt','w').writelines([line for line in open('ip.txt')if 'IPv4' in line])
open('mac1.txt','w').writelines([line for line in open('mac.txt')if 'Wi-Fi' in line])
open('mac2.txt','w').writelines([line for line in open('mac.txt')if 'Ethernet' in line])
serial = linecache.getline('serial.txt', 3)
model = linecache.getline('model.txt', 3)
mac = open("mac.txt","r")
IP = open("ip1.txt","r")
mac1 = open("mac1.txt","r")
mac2 = open("mac2.txt","r")
hostname = open("hostname.txt","r")
Rmac = mac.read()
Rip = IP.read()
Rmac1 = mac1.read()
Rmac2 = mac2.read()
Rhostname = hostname.read()
myData = [[model]]
myFile = open('example2.csv', 'w')
with myFile:
writer = csv.writer(myFile)
writer.writerows(myData)
this just will not write the information to the spreadsheet? what am I doing wrong? I am very new to programming btw
You don't need intermediary files, why not call your commands and write their info to your CSV immediately without all that back and forward dancing?
import csv
import subprocess
# get the model
model = subprocess.check_output(["WMIC", "computersystem", "get", "model"],
universal_newlines=True).strip().rsplit("\n", 1)[1]
# get the serial
serial = subprocess.check_output(["WMIC", "bios", "get", "serialnumber"],
universal_newlines=True).strip().rsplit("\n", 1)[1]
# get the host name
hostname = subprocess.check_output(["hostname"], universal_newlines=True).strip()
# get WMI output for all addresses
ips = subprocess.check_output(["WMIC", "NICCONFIG", "where", "IPEnabled=true",
"get", "IPAddress"],
universal_newlines=True).strip().split("\n\n")[1:]
# post-process to get the addresses only
ips = [ip.split(",")[0].strip('"{} ') for ip in ips]
# etc.
with open("example2.csv", "wb") as f: # open your CSV for writing
writer = csv.writer(f) # create a writer
# you didn't write a header but let's add it in
writer.writerow(["model", "serial", "hostname", "ips"]) # etc., you get the picture...
writer.writerow([model, serial, hostname, ",".join(ips)]) # add other columns, too
And you'll get a nice example2.csv containing:
model,serial,hostname,ips
Your Model,Your Serial,Your-Hostname,List.Of.IP.Addresses
Do the same for the other fields and you're done.

Search and replace specific line which starts with specific string in a file

My requirement is to open a properties file and update the file, for update purpose i need to search for a specific string which stores the url information. For this purpose i have written the below code in python:
import os
owsURL="https://XXXXXXXXXXXXXX/"
reowsURL = "gStrOwsEnv = " + owsURL + "/" + "OWS_WS_51" + "/"
fileName='C:/Users/XXXXXXXXXXX/tempconf.properties'
if not os.path.isfile(fileName):
print("!!! Message : Configuraiton.properties file is not present ")
else:
print("+++ Message : Located the configuration.properties file")
with open(fileName) as f:
data = f.readlines()
for m in data:
if m.startswith("gStrOwsEnv"):
print("ok11")
m = m.replace(m,reowsURL)
after executing the program i am not able to update the properties file.
Any help is highly appreciated
Sample Content of file:
# ***********************************************
# Test Environment Details
# ***********************************************
# Application URL pointing to test execution
#gStrApplicationURL =XXXXXXXXXXXXXXXX/webservices/person
#gStrApplicationURL = XXXXXXXXXXXXXX/GuestAPIService/ProxyServices/
# FOR JSON
#gStrApplicationURL = XXXXXXXXXXXXXX
#SOAP_gStrApplicationURL =XXXXXXXXXXXXXXXXXXXXXXX
#(FOR WSDL PARSING)
version = 5
#v9
#SOAP_gStrApplicationURL = XXXXXXXXXXX/XXXXXXXXX/XXXXXXXXX/
#v5
SOAP_gStrApplicationURL = XXXXXXXXXXXXXXX/OWS_WS_51/
gStrApplicationXAIServerPath=
gStrEnvironmentName=XXXXXXXXX
gStrOwsEnv = XXXXXXXXXXXXXXXXXXXX/OWS_WS_51/
gStrConnectEnv = XXXXXXXXXXXXXXXXX/OWSServices/Proxy/
gStrSubscriptionKey =XXXXXXXXXXXXXXXXXXXXXX
I'm pretty sure that this is not the best way of doing that, but this is still one way:
with open(input_file_name, 'r') as f_in, open(output_file_name, 'w') as f_out:
for line in f_in:
if line.startswith("gStrOwsEnv"):
f_out.write(reowsURL)
else:
f_out.write(line)
That script copy every line of input_file_name into output_file_name except the lines that you want to change.

Script that reads PDF metadata and writes to CSV

I wrote a script to read PDF metadata to ease a task at work. The current working version is not very usable in the long run:
from pyPdf import PdfFileReader
BASEDIR = ''
PDFFiles = []
def extractor():
output = open('windoutput.txt', 'r+')
for file in PDFFiles:
try:
pdf_toread = PdfFileReader(open(BASEDIR + file, 'r'))
pdf_info = pdf_toread.getDocumentInfo()
#print str(pdf_info) #print full metadata if you want
x = file + "~" + pdf_info['/Title'] + " ~ " + pdf_info['/Subject']
print x
output.write(x + '\n')
except:
x = file + '~' + ' ERROR: Data missing or corrupt'
print x
output.write(x + '\n')
pass
output.close()
if __name__ == "__main__":
extractor()
Currently, as you can see, I have to manually input the working directory and manually populate the list of PDF files. It also just prints out the data in the terminal in a format that I can copy/paste/separate into a spreadsheet.
I'd like the script to work automatically in whichever directory I throw it in and populate a CSV file for easier use. So far:
from pyPdf import PdfFileReader
import csv
import os
def extractor():
basedir = os.getcwd()
extension = '.pdf'
pdffiles = [filter(lambda x: x.endswith('.pdf'), os.listdir(basedir))]
with open('pdfmetadata.csv', 'wb') as csvfile:
for f in pdffiles:
try:
pdf_to_read = PdfFileReader(open(f, 'r'))
pdf_info = pdf_to_read.getDocumentInfo()
title = pdf_info['/Title']
subject = pdf_info['/Subject']
csvfile.writerow([file, title, subject])
print 'Metadata for %s written successfully.' % (f)
except:
print 'ERROR reading file %s.' % (f)
#output.writerow(x + '\n')
pass
if __name__ == "__main__":
extractor()
In its current state it seems to just prints a single error (as in, the error message in the exception, not an error returned by Python) message and then stop. I've been staring at it for a while and I'm not really sure where to go from here. Can anyone point me in the right direction?
writerow([file, title, subject]) should be writerow([f, title, subject])
You can use sys.exc_info() to print the details of your error
http://docs.python.org/2/library/sys.html#sys.exc_info
Did you check the pdffiles variable contains what you think it does? I was getting a list inside a list... so maybe try:
for files in pdffiles:
for f in files:
#do stuff with f
I personally like glob. Notice I add * before the .pdf in the extension variable:
import os
import glob
basedir = os.getcwd()
extension = '*.pdf'
pdffiles = glob.glob(os.path.join(basedir,extension)))
Figured it out. The script I used to download the files was saving the files with '\r\n' trailing after the file name, which I didn't notice until I actually ls'd the directory to see what was up. Thanks for everyone's help.

Categories