Reading an Asterix file from JSON output - python

Trying to convert a radar data file, that was sent to me in JSON format, to manageable DataFrame.
The first three lines of the file look like this:
{"id":1,"length":43,"crc":"D81B2DB5","timestamp":1617,"hexdata":"30002EFFD7021483000069E03BF78BE702A001E0FE2104B51D21020234269604D174E75DA008A50312B0620620B6","CAT048":{"I010":{"SAC":20,"SIC":131},"I140":{"ToD":0.8203125},"I020":{"TYP":7,"SIM":0,"RDP":0,"SPI":0,"RAB":0,"FX":0},"I040":{"RHO":59.9648438,"THETA":196.7376709},"I070":{"V":0,"G":0,"L":0,"spare":0,"Mode3A":"1240"},"I090":{"V":0,"G":0,"FL":120},"I130":{"SRLP":{"SRL":1.4501953},"SRRP":{"SRR":4},"SAMP":{"SAM":-75},"PRLP":{"PRL":1.2744141},"PAMP":{"PAM":33},"RPDP":{"RPD":0.0078125},"APDP":{"APD":0.0439453}},"I220":{"ACAddr":"342696"},"I240":{"TId":"AME4956 "},"I161":{"Tn":2213},"I200":{"CGS":172.92,"CHdg":248.0383301},"I170":{"CNF":0,"RAD":0,"DOU":0,"MAH":0,"CDM":3,"FX":0},"I230":{"COM":1,"STAT":0,"SI":0,"spare":0,"ModeSSSC":1,"ARC":0,"AIC":1,"BDS16":1,"BDS37":6}},"lat":38.585666818124,"lon":2.3784905351223,"h":3658.0244306503}
{"id":1,"length":40,"crc":"065756DA","timestamp":2468,"hexdata":"30002BFBB70214830000D2A000C8C0510A38E01804EA34239701803000000000004008BE00369EAE4624A0","CAT048":{"I010":{"SAC":20,"SIC":131},"I140":{"ToD":1.640625},"I020":{"TYP":5,"SIM":0,"RDP":0,"SPI":0,"RAB":0,"FX":0},"I040":{"RHO":0.78125,"THETA":270.4449463},"I070":{"V":0,"G":0,"L":0,"spare":0,"Mode3A":"5070"},"I130":{"SRLP":{"SRL":1.0546875},"SRRP":{"SRR":4},"SAMP":{"SAM":-22}},"I220":{"ACAddr":"342397"},"I250":[{"MCP_ALT_STATUS":1,"MCP_ALT":96,"FMS_ALT_STATUS":0,"FMS_ALT":0,"BP_STATUS":0,"BP":0,"res":0,"MODE_STATUS":0,"VNAV":0,"ALT_HOLD":0,"APP":0,"TARGET_ALT_STATUS":0,"TARGET_ALT_SOURCE":0,"BDS":"40"}],"I161":{"Tn":2238},"I200":{"CGS":11.88,"CHdg":223.1433105},"I170":{"CNF":0,"RAD":2,"DOU":0,"MAH":0,"CDM":3,"FX":0},"I230":{"COM":1,"STAT":1,"SI":0,"spare":0,"ModeSSSC":1,"ARC":0,"AIC":1,"BDS16":0,"BDS37":0}},"lat":39.543535327942,"lon":2.7284206653891,"h":4.2666605189443}
{"id":2,"length":64,"crc":"A45FA0D0","timestamp":2468,"hexdata":"300043FFF7021483000115A0896BE1B70AC105C8E01403BC4BB184508672CB482003C8480030A4018040FFD3C13A7FFCEC509E1A1F342037FF6008C1081E3CF54620F5","CAT048":{"I010":{"SAC":20,"SIC":131},"I140":{"ToD":2.1640625},"I020":{"TYP":5,"SIM":0,"RDP":0,"SPI":0,"RAB":0,"FX":0},"I040":{"RHO":137.4179688,"THETA":317.411499},"I070":{"V":0,"G":0,"L":0,"spare":0,"Mode3A":"5301"},"I090":{"V":0,"G":0,"FL":370},"I130":{"SRLP":{"SRL":0.8789062},"SRRP":{"SRR":3},"SAMP":{"SAM":-68}},"I220":{"ACAddr":"4BB184"},"I240":{"TId":"THY224 "},"I250":[{"MCP_ALT_STATUS":1,"MCP_ALT":37008,"FMS_ALT_STATUS":0,"FMS_ALT":0,"BP_STATUS":1,"BP":213,"res":0,"MODE_STATUS":1,"VNAV":1,"ALT_HOLD":0,"APP":0,"TARGET_ALT_STATUS":0,"TARGET_ALT_SOURCE":0,"BDS":"40"},{"RA_STATUS":1,"RA":-0.3515625,"TTA_STATUS":1,"TTA":84.375,"GS_STATUS":1,"GS":466,"TAR_STATUS":1,"TAR":-0.03125,"TAS_STATUS":1,"TAS":472,"BDS":"50"},{"HDG_STATUS":1,"HDG":84.5507812,"IAS_STAT":1,"IAS":271,"MACH_STATUS":1,"MACH":0.832,"BAR_STATU
I can see these lines contain the info I need, like callsign ("TId": "AME4956 "), heading and so on.
Is there a nice Pythonic way to get these values into a Dataframe?

This is almost valid JSON, except the final line seems to be truncated.
Pandas can import dictionaries with almost no pain:
import json
import pandas as pd
infos = []
with open(infofile) as fid:
for ln in fid:
infos.append(json.loads(ln))
df = pd.DataFrame(infos)
print(df)
prints:
id length crc timestamp hexdata CAT048 lat lon h
0 1 43 D81B2DB5 1617 30002EFFD7021483000069E03BF78BE702A001E0FE2104... {'I010': {'SAC': 20, 'SIC': 131}, 'I140': {'To... 38.585667 2.378491 3658.024431
1 1 40 065756DA 2468 30002BFBB70214830000D2A000C8C0510A38E01804EA34... {'I010': {'SAC': 20, 'SIC': 131}, 'I140': {'To... 39.543535 2.728421 4.266661
2 2 64 A45FA0D0 2468 300043FFF7021483000115A0896BE1B70AC105C8E01403... {'I010': {'SAC': 20, 'SIC': 131}, 'I140': {'To... NaN NaN NaN

It worked with the following code:
infos = []
with open('C:/Users/jobbr/Downloads/211201-est-000001/211201-est-000001.json') as fid:
for ln in fid:
infos.append(json.loads(ln))
df1 = pd.DataFrame(infos)
#print(df1)
# unravel this dataframe
c = df1['CAT048'].to_dict() # The CAT048 field holds important parameters
for i in range(0,len(df1)):
#print(i, c[i]['I200']['CGS'], c[i]['I200']['CHdg'])
df1.at[i, 'ground_speed'] = c[i]['I200']['CGS']
df1.at[i, 'heading'] = c[i]['I200']['CHdg']
df1.at[i, 'ACAddr'] = c[i]['I220']['ACAddr']
df1.at[i, 'ToD'] = c[i]['I140']['ToD']
try: # not all parameters seem to be present always
df1.at[i, 'flight_level'] = c[i]['I090']['FL']
except:
df1.at[i, 'flight_level'] = 0
try:
df1.at[i, 'callsign'] = c[i]['I240']['TId']
except:
df1.at[i, 'callsign'] = 'Unknown'
Thanks for the help!

Related

Python - looping through rows and concating rows until a certain value is encountered

I am getting myself very confused over a problem I am encountering with a short python script I am trying to put together. I am trying to iterate through a dataframe, appending rows to a new dataframe, until a certain value is encountered.
import pandas as pd
#this function will take a raw AGS file (saved as a CSV) and convert to a
#dataframe.
#it will take the AGS CSV and print the top 5 header lines
def AGS_raw(file_loc):
raw_df = pd.read_csv(file_loc)
#print(raw_df.head())
return raw_df
import_df = AGS_raw('test.csv')
def AGS_snip(raw_df):
for i in raw_df.iterrows():
df_new_row = pd.DataFrame(i)
cut_df = pd.DataFrame(raw_df)
if "**PROJ" == True:
cut_df = cut_df.concat([cut_df,df_new_row],ignore_index=True, sort=False)
elif "**ABBR" == True:
break
print(raw_df)
return cut_df
I don't need to get into specifics, but the values (**PROJ and **ABBR) in this data occur as single cells as the top of tables. So I want to loop row-wise through the data, appending rows until **ABBR is encountered.
When I call AGS_snip(import_df), nothing happens. Previous incarnations just spat out the whole df, and I'm just confused over the logic of the loops. Any assistance much appreciated.
EDIT: raw text of the CSV
**PROJ,
1,32
1,76
32,56
,
**ABBR,
1,32
1,76
32,56
The test CSV looks like this:
The reason that "nothing happens" is likely b/c of the conditions you're using in if and elif.
Neither "**PROJ" == True nor "**ABBR" == True will ever be True because neither "**PROJ" nor "**ABBR" are equal to True. Your code is equivalent to:
def AGS_snip(raw_df):
for i in raw_df.iterrows():
df_new_row = pd.DataFrame(i)
cut_df = pd.DataFrame(raw_df)
if False:
cut_df = cut_df.concat([cut_df,df_new_row],ignore_index=True, sort=False)
elif False:
break
print(raw_df)
return cut_df
Which is the same as:
def AGS_snip(raw_df):
for i in raw_df.iterrows():
df_new_row = pd.DataFrame(i)
cut_df = pd.DataFrame(raw_df)
print(raw_df)
return cut_df
You also always return from inside the loop and df_new_row isn't used for anything, so it's equivalent to:
def AGS_snip(raw_df):
first_row = next(raw_df.iterrows(), None)
if first_row:
cut_df = pd.DataFrame(raw_df)
print(raw_df)
return cut_df
Here's how to parse your CSV file into multiple separate dataframes based on a row condition. Each dataframe is stored in a Python dictionary, with titles as keys and dataframes as values.
import pandas as pd
df = pd.read_csv('ags.csv', header=None)
# Drop rows which consist of all NaN (Not a Number) / missing values.
# Reset index order from 0 to the end of dataframe.
df = df.dropna(axis='rows', how='all').reset_index(drop=True)
# Grab indices of rows beginning with "**", and append an "end" index.
idx = df.index[df[0].str.startswith('**')].append(pd.Index([len(df)]))
# Dictionary of { dataframe titles : dataframes }.
dfs = {}
for k in range(len(idx) - 1):
table_name = df.iloc[idx[k],0]
dfs[table_name] = df.iloc[idx[k]+1:idx[k+1]].reset_index(drop=True)
# Print the titles and tables.
for k,v in dfs.items():
print(k)
print(v)
# **PROJ
# 0 1
# 0 1 32.0
# 1 1 76.0
# 2 32 56.0
# **ABBR
# 0 1
# 0 1 32.0
# 1 1 76.0
# 2 32 56.0
# Access each dataframe by indexing the dictionary "dfs", for example:
print(dfs['**ABBR'])
# 0 1
# 0 1 32.0
# 1 1 76.0
# 2 32 56.0
# You can rename column names with for example this code:
dfs['**PROJ'].set_axis(['data1', 'data2'], axis='columns', inplace=True)
print(dfs['**PROJ'])
# data1 data2
# 0 1 32.0
# 1 1 76.0
# 2 32 56.0

Python add column names and split rows to columns

I need help with coding.
I wrote code to get last 2 rows from csv file and after that saving it to another file.
The code looks like this:
with open(outputFileName,"r") as f:
reader = csv.reader(f,delimiter = ",")
data = list(reader)
row_count = len(data)
df = pd.read_csv(outputFileName, skiprows = row_count - 2)
df.to_csv('D:\koreguoti.csv', index=False)
Data in file now looks like: (but without names Column1 and Column2. I jus want to show you, that information is in diferent columns)
Column1 | Column2
2021.03.17 12:00:00 P+ 0 | 644.0
0 2021.03.17 12:00:00 P- 0 | 6735.0
So I need to have it in this format (with names of columns):
Date | Time | P | Value
0 2021.03.17 | 12:00:00 | P+| 644.0
1 2021.03.17 | 12:00:00 | P-| 6735.0
Could anybody to help me?
I'd do a text = split(csv_file) and keep only what I want then reorganise them.
For exemple if you have :
Column 1 Column 2
12 15
You do text = split(csv_file) so it gives you text = ["Column", "1", "Column", "2", "12", "15"]
And you just take the two last ones and do a
print("Month : Day :\n\
{} {}".format(text[4], text[5])
and that's it.
Of course you need to change some things until it works for you.
Solved, by working arround
df['0'] = ['no']
df['1'] = ['no']
df['2'] = ['no']
df.to_csv('D:\koreguoti1.csv', index=False)
#---------------------------------------------------------------------------
#Rename column names
df = pd.read_csv('D:\koreguoti1.csv', header=None)
df.rename(columns={0: 'Data',1: 'Laikas', 2: 'P', 3: 'Nulis', 4: 'Verte'}, inplace=True)
# Copy values from one column to another
df['Verte'] = df['Laikas']
# Split first columns to 4 columns
split_data = df["Data"].str.split(" ")
data = split_data.to_list()
names = ["Data", "Laikas", "P", "Nulis"]
new_df = pd.DataFrame(data, columns=names)
new_df.insert(4, "Verte", 0)
# adding needed column
new_df['Verte'] = df['Laikas']
# Deleting not needed column "Nulis"
del new_df['Nulis']
#print(new_df)
# Save everything to new file
new_df.to_csv('D:\sutvarkyti.csv', index=False)

How can i append a list to a dataframe via for loop in a for loop

I have made a for loop which uses a list of stock tickers to get day closing prices. Once collected, I ask the code to store the data in a dataframe. This works fine, but I am having trouble creating a way to append the dataframe over and over again, such that I am left with one large dataframe. Can anybody help with that? Please note that the API connection allows a certain amount of calls pr. minutes and so there should be a time-extension if the call fails - I have tried to account for this. Please see code below:
C20 = ['AMBU-B.CPH', 'MAERSK-B.CPH']
df = pd.DataFrame()
def getdata(symbol_input):
for i in symbol_input:
try:
API_KEY = 'XXXXXXXXX' #MY API KEY
symbol = i #søg på google efter firmanavnet og "stock price". Tickeren er den der skal bruges
r = requests.get('https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=' + i + '&apikey=' + API_KEY)
result = r.json()
AllData = result['Time Series (Daily)']
alldays = list(AllData.keys())
alldays.sort()
timeinterval = 10
days = alldays[len(alldays)-timeinterval:len(alldays)]
#print(days)
SymbolList = []
for i in range(timeinterval):
SymbolList.append(symbol)
#print(SymbolList)
StockPriceList = []
if (r.status_code == 200):
for i, day in enumerate(days):
result = r.json()
dataForAllDays = result['Time Series (Daily)']
dataForSingleDate = dataForAllDays[days[i]]
#print (days[i], dataForSingleDate['4. close'])
StockPriceList.append(dataForSingleDate['4. close'])
#print(StockPriceList)
combined_lists = list(zip(days, StockPriceList, SymbolList)) #create tuples to feed into dataframe from multiple lists
df1 = pd.DataFrame(combined_lists, columns = ['Date', 'Price', 'Stock Ticker'])
print(df1)
time.sleep(10)
except:
print('could not get data for: ' + i)
time.sleep(1) # wait for 1 seconds before trying to fetch the data again
continue
print(getdata(C20))
You can use pd.concat and then joining everything by using temporary dataframe into one final dataframe.
You can use this code as an example for concatenating two different dataframes into a single final dataframe.
dataset1 = pd.DataFrame([[1,2],[2,3],[3,4]],columns=['A','B'])
dataset2 = pd.DataFrame([[4,5],[5,6],[6,7]],columns=['A','B'])
full_dataset = pd.concat([dataset1,dataset2])
full_dataset
A B
0 1 2
1 2 3
2 3 4
0 4 5
1 5 6
2 6 7
Reference: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.concat.html
Let me know if you require anything else. Have a great day!

Specify length (padding) of each column in a csv

I am trying to re-arrange a file to match a BACS bank format. In order for it to work the columns in the csv below need to be of a specific length. I have figured out the abcdabcd column as it's a repeating pattern (as are a couple more in the file), but several columns have random numbers that I cannot easily target.
Is there a way for me to target either (ideally) a specific column based on its header, or alternatively target everything up to a comma to butcher something that could work?
In my example file below, you'll see three columns where the value changes. If targeting everything up to a specific character is the solution, I was thinking of using .ljust to fill the column up to the specified length (and then sorting it out manually in excel).
Original File
a,b,c,d,e,f,g,h,i,j,k
12345,1234567,0,11,123456,12345678,1234567,abcdabcd,A ABCD
123456,12345678,0,11,123456,12345678,12345678,abcdabcd,A ABCD
123456,1234567,0,11,123456,12345678,12345,abcdabcd,A ABCD
12345,1234567,0,11,123456,12345678,1234567,abcdabcd,A ABCD
123456,12345678,0,11,123456,12345678,123456789,abcdabcd,A ABCD
Ideal output
a,b,c,d,e,f,g,h,i,j,k
123450,12345670,0,11,123456,12345678,123456700,abcdabcd,A ABCD
123456,12345678,0,11,123456,12345678,123456780,abcdabcd,A ABCD
123456,12345670,0,11,123456,12345678,123450000,abcdabcd,A ABCD
123450,12345670,0,11,123456,12345678,123456700,abcdabcd,A ABCD
123456,12345678,0,11,123456,12345678,123456789,abcdabcd,A ABCD
Code
with open('file.txt', 'r') as file :
filedata = file.read()
filedata = filedata.replace('12345', '12345'.ljust(6, '0'))
with open('file.txt', 'w') as file:
file.write(filedata)
EDIT:
Something similar to this Python - How to add zeros to and integer/string? but while either targeting a specific column per header, or at least the first one.
EDIT2:
I am using the below to rearrange my columns, could this be modified to work with string lengths?
import pandas as pd
## Read csv / tab-delimited in this example
df = pd.read_csv('test.txt', sep='\t')
## Reorder columns
df = df[['h','i','c','g','a','b','e','d','f','j','k']]
## Write csv / tab-delimited
df.to_csv('test', sep='\t')
Using pandas, you can convert the column to str and then use .str.pad. You can make a dict with the requested lengths:
lengths = {
"a": 6,
"b": 8,
"c": 3,
"d": 6,
"e": 8,
}
and use it like this:
result = pd.DataFrame(
{
column_name: column.str.pad(
lengths.get(column_name, 0), side="right", fillchar="0"
)
for column_name, column in df.astype(str).items()
}
)
If the fillchar is different per column, you can get that from a dict as well
>>> print '{:0>5}'.format(4)
'00004'
>>> print '{:0<5}'.format(4)
'40000'
>>> print '{:0^5}'.format(4)
'00400'
Example:
#--------------DEFs------------------
def number_zero_right(number,len_number):
return ('{:0<'+str(len_number)+'}').format(number)
#--------------MAIN------------------
a = 12345
b = 1234567
c = 0
d = 11
e = 123456
f = 12345678
g = 1234567
h = 'abcdabcd'
i = 'A'
j = 'ABCD'
print(a,b,c,d,e,f,g,h,i,j)
# > 12345 1234567 0 11 123456 12345678 1234567 abcdabcd A ABCD
a = number_zero_right(a,6)
b = number_zero_right(b,8)
c = number_zero_right(c,1)
d = number_zero_right(d,2)
e = number_zero_right(e,6)
f = number_zero_right(f,8)
g = number_zero_right(g,9)
print(a,b,c,d,e,f,g,h,i,j)
#> 123450 12345670 0 11 123456 12345678 123456700 abcdabcd A ABCD
Managed to get there, so thought I'd post in case someone has a similar issue. This only works on one column, but that's enough for me now.
#import pandas
import pandas as pd
#open file and convert data to str
data = pd.read_csv('Test.CSV', dtype = str)
# width of output string
width = 6
# fillchar
char ="_"
#Change the contents of column named ColumnID
data["ColumnID"]= data["ColumnID"].str.ljust(width, char)
#print output
print(data)

Extracting text data into a meaningful table for analysis using Python (or R)

I'm working on an engineering project in which I'm using machine performance data from archives. The machine produces one data set approximately every 5s and this data is then available date-wise in a number of .txt files with each file containing data in the following format. The data shown below is from the 2013_04_17.txt file which has all the performance data for that particular date.
2013-04-27 00:00:05.011
V_1 100 V_2 26695 V_3 33197 V_4 c681 V_5 29532
V_6 4600 V_7 4606 V_8 4f55 V_9 5a V_10 8063 V_11 4300 V_12 4700
V_13 4504 V_14 4400 V_15 4202 V_16 255 V_17 4300 V_18 91 V_19 6f
V_20 300 V_21 14784
V_22 5.085 V_23 7.840 V_24 -8.061 V_25 36.961
2013-04-27 00:00:10.163
V_1 100 V_2 26695 V_3 33199 V_4 c681 V_5 29872
V_6 4600 V_7 4606 V_8 4f55 V_9 5a V_10 8063 V_11 4300 V_12 4700
V_13 4504 V_14 4400 V_15 4202 V_16 255 V_17 4300 V_18 91 V_19 6f
V_20 300 V_21 14790
V_22 5.085 V_23 7.840 V_24 -8.061 V_25 37.961
..........
I need to view this data in a tabular format or as a CSV in order to be able to produce performance plots and detect any anomalies. However, I do not have enough experience with programming in Python to be able to parse this text file.
I've looked into pandas and Regular Expressions for some ideas but have been failing to achieve the desired result and I'm hoping to have a data in a tabular form or a CSV file with the header as variables Date, Time, V_1, V_2, V_3, etc and the subsequent rows as all the values obtained every 5s.
You can start by reading the tokens one at a time from the file:
with open('2013_04_17.txt') as infile:
for line in infile:
for token in line.split():
print(token)
After that you just need to create a state machine to remember which section you're in, and process each section when you find its end:
def process_record(timestamp, values):
"""print CSV format"""
print(','.join([timestamp] + values))
with open('t.txt') as infile:
timestamp = None
values = []
for line in infile:
line = line.strip()
if timestamp is None:
timestamp = line
elif not line: # blank line is separator
process_record(timestamp, values)
timestamp = None
values = []
else:
values.extend(line.split()[1::2])
if timestamp is not None: # process last record, no separator after it
process_record(timestamp, values)
That gives you CSV output:
2013-04-27 00:00:05.011,100,26695,33197,c681,29532,4600,4606,4f55,5a,8063,4300,4700,4504,4400,4202,255,4300,91,6f,300,14784,5.085,7.840,-8.061,36.961
2013-04-27 00:00:10.163,100,26695,33199,c681,29872,4600,4606,4f55,5a,8063,4300,4700,4504,4400,4202,255,4300,91,6f,300,14790,5.085,7.840,-8.061,37.961
In R, and this would be very specific to your case you can try tossing all the .txt files into a new folder, for example call it date_data. Assuming all the files are in this same format try running this.
library(purrr)
library(tidyverse)
setwd(./date_data)
odd_file_reader <- function(x){
as.data.frame(matrix(scan(x, what="character", sep=NULL), ncol = 52, byrow = TRUE)[,-seq(3,51,2)])
}
binded_data <- tibble(filenames = list.files()) %>%
mutate(yearly_sat = map(filenames, odd_file_reader)) %>%
unnest()
There's a much easier way. Assuming this data appears in columns in the .txt file (ie the data is in a Fixed-Width Format), you can use the pandas function pandas.read_fwf() and pass in tuples containing the extents of the fixed-width fields of each line.
import pandas
colspecs = [(0,10), (11, 23), (28,31), (37, 42), (48, 54), (59, 63), (70, 75), ...]
data = pandas.read_fwf(TXT_PATH, colspecs = colspecs, header=None)
data.columns = ['date', 'time', 'V_1', 'V_2', 'V_3', 'V_4', 'V_5', ...]
print(data)
date time V_1 V_2 V_3 V_4 V_5
0 2013-04-27 00:00:05.011 100 26695 33197 c681 29532
1 2013-04-27 00:00:10.163 100 26695 33199 c681 29872
And from there, you can save that formatted data to file with the command
data.to_csv('filename.csv', index=False)
try my simple code, i used pandas
import pandas as pd
with open('2013_04_17.txt', 'r') as f:
large_list = [word for line in f for word in line.split() if 'V_' not in word]
print(large_list)
col_titles = ('date','time','v1','v2','vN','vN','vN','vN','vN','vN','vN','vN'
,'vN','vN','vN','vN','vN','vN','vN','vN','vN','vN','vN','vN','vN','vN','vN')
data = pd.np.array(large_list).reshape((len(large_list) // 27, 27))
pd.DataFrame(data, columns=col_titles).to_csv("output3.csv", index=False)
Edit:
you can achieve same results without regex as follows:
note, we assume that file format is the same all time, so we are expecting date and time at the beginning of the file
# reading data from a file for example log.txt
with open('log.txt', 'r') as f:
data = f.read()
data = string.split()
v_readings = dict()
v_readings['date'] = data.pop(0)
v_readings['time' ]= data.pop(0)
i=0
while i < len(data):
v_readings[data[i]] = data[i+1]
i += 2
exporting to csv file:
csv = '\n'
csv += ','.join(v_readings.keys())
csv += '\n'
csv += ','.join(v_readings.values())
print(csv)
with open('out.csv', 'w') as f:
f.write(csv)
output:
date,time,V_1,V_2,V_3,V_4,V_5,V_6,V_7,V_8,V_9,V_10,V_11,V_12,V_13,V_14,V_15,V_16,V_17,V_18,V_19,V_20,V_21,V_22,V_23,V_24,V_25
2013-04-27,00:00:05.011,100,26695,33197,c681,29532,4600,4606,4f55,5a,8063,4300,4700,4504,4400,4202,255,4300,91,6f,300,14784,5.085,7.840,-8.061,36.961
with regex:
This is how you extract these data using regex in variables and dictionary in python
this is a start point and then you can do whatever you like with them afterwords
import re
string = """
2013-04-27 00:00:05.011 V_1 100 V_2 26695 V_3 33197 V_4 c681 V_5 29532 V_6 4600 V_7 4606 V_8 4f55 V_9 5a V_10 8063 V_11 4300 V_12 4700 V_13 4504 V_14 4400 V_15 4202 V_16 255 V_17 4300 V_18 91 V_19 6f V_20 300 V_21 14784 V_22 5.085 V_23 7.840 V_24 -8.061 V_25 36.961
"""
# extract date
match = re.search(r'\d{4}-\d\d-\d\d', string)
my_date = match.group()
# extract time
match = re.search(r'\d\d:\d\d:\d\d\.\d+', string)
my_time = match.group()
#getting V's into a dictionary
match = re.findall(r'V_\d+ \d+', string)
v_readings = dict()
for item in match:
k, v = item.split()
v_readings[k] = v
# print output
print(my_date)
print(my_time)
print(v_readings)
output:
2013-04-27
00:00:05.011
{'V_1': '100', 'V_2': '26695', 'V_3': '33197', 'V_5': '29532', 'V_6': '4600', 'V_7': '4606', 'V_8': '4', 'V_9': '5', 'V_10': '8063', 'V_11': '4300', 'V_12': '4700', 'V_13': '4504', 'V_14': '4400', 'V_15': '4202', 'V_16': '255', 'V_17': '4300', 'V_18': '91', 'V_19': '6', 'V_20': '300', 'V_21': '14784', 'V_22': '5', 'V_23': '7', 'V_25': '36'}

Categories