Overwrite a specific column in a csv file using Python csv module - python

I am using Python csv module to read a csv file with every line being like:
2013-04-16 7:11:01,186744,3,2,2,1.89E-03
I then convert row[0] to unix time but then I want to replace the datetime with the unix time I just found for every row of my csv file
import pymongo
import datetime
import re
import csv
import calendar
X = []
OBD = []
Y = []
csv_in = open('FakeAPData.csv', 'rb')
for row in reader:
date = datetime.datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S')
datet = unicode(datetime.datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S'))
datett = tuple(int(v) for v in re.findall("[0-9]+", datet))
y = calendar.timegm(datett)
Y.append(y)
So I create the list Y with the unixtime values but then how do I do the replacement so as to have an output like that:
1366097085,186744,3,2,2,1.89E-03

First of all, there are better ways to convert a textual date-time format into a UNIX timestamp. Direct use of the time module simplifies your code to:
import time
import calendar
timestamp = calendar.gmtime(time.strptime(row[0], '%Y-%m-%d %H:%M:%S'))
but even the datetime object you created has .timetuple() and .utctimetuple() methods that would be miles more reliable at producing a time_struct tuple than parsing the string format of the datetime object back to a tuple of integers. You may as well do that directly on row[0] as the output of str(datetime.now()) is the same format as what you started with.
Next, write out a new file and replace the old one with it once done:
import csv
import time
import calendar
import os
with open('FakeAPData.csv', 'rb') as infile, open('FakeAPData.csv.new', 'wb') as outfile:
writer = csv.writer(outfile)
for row in csv.reader(infile):
timestamp = calendar.gmtime(time.strptime(row[0], '%Y-%m-%d %H:%M:%S'))
writer.writerow([timestamp] + row[1:])
os.rename('FakeAPData.csv.new', 'FakeAPData.csv')

Each row is just a list. You can modify it in-place, or create a new list with the value you want substituted out:
row[0] = y # or row = [y] + row[1:], or ...
If you want to write it back to a file, you need to use a csv.writer for that. For example:
os.rename('FakeAPData.csv', 'FakeAPData.csv.bak')
csv_in = open('FakeAPData.csv.bak', 'rb')
csv_out = open('FakeAPData.csv', 'wb')
writer = csv.writer(csv_out)
for row in csv.reader(csv_in):
date = datetime.datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S')
datet = unicode(datetime.datetime.strptime(row[0], '%Y-%m-%d %H:%M:%S'))
datett = tuple(int(v) for v in re.findall("[0-9]+", datet))
y = calendar.timegm(datett)
row[0] = y
writer.writerow(row)
Of course you'll also want to close your files, and clean up all the repeated and unused code. While we're at it, I'd factor out the date-transforming code into a function. And use functions that make it easy, instead of ones that make it difficult and fragile.
So:
def transform_date(date):
return calendar.gmtime(datetime.strptime(date, '%Y-%m-%d %H:%M:%S').timetuple())
def transform_row(row):
return [transform_date(row[0])] + row[1:]
name = 'FakeAPData.csv'
bakname = name + '.bak'
os.rename(name, bakname)
with open(bakname, 'rb') as in csv_in, open(name, 'wb') as csv_out:
writer = csv.writer(csv_out)
writer.writerows(transform_row(row) for row in csv.reader(csv_in))

Related

ValueError: time data 'LEGACY SYSTEM' does not match format '%H:%M:%S'

I see this is a popular question, so hopefully someone can help me out. I, however, am stumped. I have CSV file that contains a timestamp in the first column, such as
18:49:45
19:50:31
20:51:26
My code is below. I think I am using the proper formatting for 24-hours, minutes, and seconds.
import csv
import time
with open('file.csv', 'rb')as csvfile:
filereader = csv.reader(csvfile, delimiter=',')
for row in filereader:
date = row[0]
parsed = datetime.datetime.strptime(date, '%H:%M:%S')
Error: ValueError: time data 'LEGACY SYSTEM' does not match format '%H:%M:%S'
You have a header row. You can use next to retrieve (and then discard) the first line from your csv.reader iterator:
from datetime import datetime
import csv
from io import StringIO
file = StringIO("""LEGACY SYSTEM
18:49:45
19:50:31
20:51:26""")
# replace file with open('file.csv', 'rb')
with file as csvfile:
filereader = csv.reader(csvfile, delimiter=',')
next(filereader)
for row in filereader:
parsed = datetime.strptime(row[0], '%H:%M:%S')
print(parsed)
# 1900-01-01 18:49:45
# 1900-01-01 19:50:31
# 1900-01-01 20:51:26
If you have multiple, say two, header rows, you can use a for loop to ignore them:
for _ in range(2):
next(filereader)

converting datetime object to float in python reading csv

i want to put datetime into a array, is there a solution? i am newbie
import csv
from datetime import datetime
date = []
price = []
tdate = []
tprice = []
with open('TSLA.csv', 'r') as csvfile:
csvR = csv.reader(csvfile)
next(csvR) # skipping column names
for i,row in enumerate(csvR):
row_date = datetime.strptime(row[0], "%m/%d/%Y")
date.append(float(row_date))
price.append(float(row[5]))
if you want to see the error:
File "csvtest.py", line 14, in <module>
date.append(float(row_date))
TypeError: float() argument must be a string or a number, not 'datetime.datetime'
update
with open('TSLA.csv', 'r') as csvfile:
csvR = csv.reader(csvfile)
next(csvR) # skipping column names
for i,row in enumerate(csvR):
ts = time.strptime(row[0], "%m/%d/%Y")
time.mktime(ts)
date.append(float(ts))
price.append(float(row[5]))
error:
TypeError: float() argument must be a string or a number, not 'time.struct_time'
Conversion to float is not very informative here. You can however convert the datetime object to a timestamp object.
Considering that row[0] holds a datetime object, something like below should work:
import time
timestamp = time.mktime((row[0].timetuple())
timestamp would be a UTC timestamp generated from your datetime object.
Update:
Observed that row[0] holds a date in string format.
>>> import time
>>> ts = time.strptime("10/10/2018", "%m/%d/%Y")
>>> time.mktime(ts)
>>> 1539109800.0
sloved
with open('TSLA.csv', 'r') as csvfile:
csvR = csv.reader(csvfile)
next(csvR) # skipping column names
for i,row in enumerate(csvR):
date.append(datetime.strptime(row[0],'%m/%d/%Y'))
price.append(float(row[5]))
if(i >= 25):
tdate.append(float(row[7]))
tprice.append(float(row[5]))
break

Python- MM/DD/YY Sorting

So I'm getting this error:
time data '6/28/18' does not match format '%b/%d/%y'
I have a csv file with the 4th column having the dates and want to sort the data by date... Any suggestions or possible solutions? I'm not so familiar with the datetime feature of Python...
import csv
from datetime import datetime
with open('example.csv', newline='') as f:
reader = csv.reader(f)
data = sorted(reader, key = lambda row: datetime.strptime(row[4], '%b/%d/%y'))
print (data)
Use "%m/%d/%y" instead of "%b/%d/%y"
>>> x = '6/28/18'
>>> datetime.strptime(x, '%m/%d/%y')
datetime.datetime(2018, 6, 28, 0, 0)
Your datetime.strptime format string should be '%m/%d/%y'.
The %b option would work if your month was an abbreviated name like 'Jun'
For more on Python's datetime formatting options see this link:
https://docs.python.org/2/library/datetime.html#strftime-and-strptime-behavior

Convert any Date String Format to a specific date format string

I am making a generic tool which can take up any csv file.I have a csv file which looks something like this. The first row is the column name and the second row is the type of variable.
sam.csv
Time,M1,M2,M3,CityName
temp,num,num,num,city
20-06-13,19,20,0,aligarh
20-02-13,25,42,7,agra
20-03-13,23,35,4,aligarh
20-03-13,21,32,3,allahabad
20-03-13,17,27,1,aligarh
20-02-13,16,40,5,aligarh
Other CSV file looks like:
Time,M1,M2,M3,CityName
temp,num,num,num,city
20/8/16,789,300,10,new york
12/6/17,464,67,23,delhi
12/6/17,904,98,78,delhi
So, there could be any date format or it could be a time stamp.I want to convert it to "20-May-13" or "%d-%b-%y" format string everytime and sort the column from oldest date to the newest date. I have been able to search the column name where the type is "temp" and try to convert it to the required format but all the methods require me to specify the original format which is not possible in my case.
Code--
import csv
import time
from datetime import datetime,date
import pandas as pd
import dateutil
from dateutil.parser import parse
filename = 'sam.csv'
data_date = pd.read_csv(filename)
column_name = data_date.ix[:, data_date.loc[0] == "temp"]
column_work = column_name.iloc[1:]
column_some = column_work.iloc[:,0]
default_date = datetime.combine(date.today(), datetime.min.time()).replace(day=1)
for line in column_some:
print(parse(line[0], default=default_date).strftime("%d-%b-%y"))
In "sam.csv", the dates are in 2013. But in my output it gives the correct format but all the 6 dates as 2-Mar-2018
You can use the dateutil library for converting any date format to your required format.
Ex:
import csv
from dateutil.parser import parse
p = "PATH_TO_YOUR_CSV.csv" #I have used your sample data to test.
with open(p, "r") as infile:
reader = csv.reader(infile)
next(reader) #Skip Header
next(reader) #Skip Header
for line in reader:
print(parse(line[0]).strftime("%d-%B-%y")) #Parse Date and convert it to date-month-year
Output:
20-June-13
20-February-13
20-March-13
20-March-13
20-March-13
20-February-13
20-August-16
06-December-17
06-December-17
MoreInfo on Dateutil

How can I change date format in csv in Python 3

I'm new to Python, and I have a set of data in a CSV file that I would like to change the format from
'%Y-%m-%dT%H:%MZ' to '%m/%d/%Y'
I'm running Python 3 on Windows. I've searched S.O. (and other sites) several times but none of the examples/solutions seem to actually convert the format of the output. I've read the Python online documentation but was unable to take anything meaningful away from it.
Here's the code I just tried, and it doesn't change the formatting on any of the entries in the column:
with open('some_file', 'r') as source:
with open('some_other_file', 'w') as result:
writer = csv.writer(result, lineterminator='\n')
reader = csv.reader(source)
source.readline()
for row in reader:
ts = row[17]
ts = datetime.strptime(ts, '%Y-%m-%dT%H:%MZ').strftime("%m/%d/%Y")
if ts != "":
writer.writerow(row)
source.close()
result.close()
I get no errors, but I get no change in the format of the timestamp either.
Suppose you have a date x:
x = "2017-07-01T15:55Z"
You can convert it into a datetime.datetime with your formate %Y-%m-%dT%H:%MZ:
from datetime import datetime
d = datetime.strptime(x, '%Y-%m-%dT%H:%MZ')
Then format it:
d.strftime("%m/%d/%Y")
You'll get:
'07/01/2017'
The complete code is:
from datetime import datetime
x = "2017-07-01T15:55Z"
x = datetime.strptime(x, '%Y-%m-%dT%H:%MZ').strftime("%m/%d/%Y")
======= EDIT =======
For your follow up question:
you need to change row after formatting ts:
ts = row[17]
ts = datetime.strptime(ts, '%Y-%m-%dT%H:%MZ').strftime("%m/%d/%Y")
if ts != "":
row[17] = ts # this is what you miss
writer.writerow(row)
If I understand your question correctly, you have a string in your CSV file that looks like '2017-08-10T20:47Z'. You should convert this to a datetime.datetime instance with
from datetime import datetime
dt = datetime.strptime('2017-08-10T20:47Z', '%Y-%m-%dT%H:%MZ')
This will give you a datetime.datetime object: datetime.datetime(2017, 8, 10, 20, 47). You can then reformat it as required with
dts = dt.strftime('%m/%d/%Y')
giving the result '08/10/2017' in dts to write to your updated CSV file.
import csv
from datetime import datetime
with open('some_file.csv', 'r') as source:
with open('some_other_file.csv', 'w') as result:
writer = csv.writer(result, lineterminator='\n')
reader = csv.reader(source)
source.readline()
for row in reader:
ts = datetime.strptime(row[0], '%m/%d/%y %H:%M').strftime("%Y-%m-%d %H:%M:00")
print(ts)
row[0]=ts
if ts != "":
writer.writerow(row)
source.close()
result.close()

Categories