I have written below function in Python:
df = pd.DataFrame({'age': [32, 33, 33,34,44]})
def PROC_FREQ(dataset,arg1):
x= dataset.groupby(arg1)[arg1[0]].agg(({'Frequency':'count'}))
nombre=x.columns.tolist()[0]
x.rename(columns={nombre:'Freq'},inplace=True)
x['Pct']=round((x['Freq']/x.Freq.sum())*100,2)
x['Freq Acum'],x['Cumm Percent']=x.Freq.cumsum(),x.Pct.cumsum()
x.sort_values(arg1,ascending=[1],inplace=True)
pd.set_option('display.max_columns',500)
x=x.reset_index()
string_repr = x.to_string(index=False,justify='center').splitlines()
string_repr.insert(1, "-" * len(string_repr[0]))
out = '\n'.join(string_repr)
df_split = out.split('\n')
columns = shutil.get_terminal_size().columns
for i in range(len(df_split)):
print(df_split[i].center(columns))
and below is the code to call the function:
PROC_FREQ(df,['age'])
and below is the output of the function:
age Freq Pct Freq Acum Cumm Percent
-----------------------------------------
32 1 16.67 1 16.67
33 2 33.33 3 50.00
34 1 16.67 4 66.67
44 2 33.33 6 100.00
Last line the output is not aligned correctly.
Related
I have a pandas data frame that looks like this:
id age weight group
1 12 45 [10-20]
1 18 110 [10-20]
1 25 25 [20-30]
1 29 85 [20-30]
1 32 49 [30-40]
1 31 70 [30-40]
1 37 39 [30-40]
I am looking for a data frame that would look like this: (sd=standard deviation)
group group_mean_weight group_sd_weight rest_mean_weight rest_sd_weight
[10-20]
[20-30]
[30-40]
Here the second/third columns are mean and SD for that group. columns third and fourth are mean and SD for the rest of the groups combined.
Here's a way to do it:
res = df.group.to_frame().groupby('group').count()
for group in res.index:
mask = df.group==group
srGroup, srOther = df.loc[mask, 'weight'], df.loc[~mask, 'weight']
res.loc[group, ['group_mean_weight','group_sd_weight','rest_mean_weight','rest_sd_weight']] = [
srGroup.mean(), srGroup.std(), srOther.mean(), srOther.std()]
res = res.reset_index()
Output:
group group_mean_weight group_sd_weight rest_mean_weight rest_sd_weight
0 [10-20] 77.500000 45.961941 53.60 24.016661
1 [20-30] 55.000000 42.426407 62.60 28.953411
2 [30-40] 52.666667 15.821926 66.25 38.378596
An alternative way to get the same result is:
res = ( pd.DataFrame(
df.group.drop_duplicates().to_frame()
.apply(lambda x: [
df.loc[df.group==x.group,'weight'].mean(),
df.loc[df.group==x.group,'weight'].std(),
df.loc[df.group!=x.group,'weight'].mean(),
df.loc[df.group!=x.group,'weight'].std()], axis=1, result_type='expand')
.to_numpy(),
index=list(df.group.drop_duplicates()),
columns=['group_mean_weight','group_sd_weight','rest_mean_weight','rest_sd_weight'])
.reset_index().rename(columns={'index':'group'}) )
Output:
group group_mean_weight group_sd_weight rest_mean_weight rest_sd_weight
0 [10-20] 77.500000 45.961941 53.60 24.016661
1 [20-30] 55.000000 42.426407 62.60 28.953411
2 [30-40] 52.666667 15.821926 66.25 38.378596
UPDATE:
OP asked in a comment: "what if I have more than one weight column? what if I have around 10 different weight columns and I want sd for all weight columns?"
To illustrate below, I have created two weight columns (weight and weight2) and have simply provided all 4 aggregates (mean, sd, mean of other, sd of other) for each weight column.
wgtCols = ['weight','weight2']
res = ( pd.concat([ pd.DataFrame(
df.group.drop_duplicates().to_frame()
.apply(lambda x: [
df.loc[df.group==x.group,wgtCol].mean(),
df.loc[df.group==x.group,wgtCol].std(),
df.loc[df.group!=x.group,wgtCol].mean(),
df.loc[df.group!=x.group,wgtCol].std()], axis=1, result_type='expand')
.to_numpy(),
index=list(df.group.drop_duplicates()),
columns=[f'group_mean_{wgtCol}',f'group_sd_{wgtCol}',f'rest_mean_{wgtCol}',f'rest_sd_{wgtCol}'])
for wgtCol in wgtCols], axis=1)
.reset_index().rename(columns={'index':'group'}) )
Input:
id age weight weight2 group
0 1 12 45 55 [10-20]
1 1 18 110 120 [10-20]
2 1 25 25 35 [20-30]
3 1 29 85 95 [20-30]
4 1 32 49 59 [30-40]
5 1 31 70 80 [30-40]
6 1 37 39 49 [30-40]
Output:
group group_mean_weight group_sd_weight rest_mean_weight rest_sd_weight group_mean_weight2 group_sd_weight2 rest_mean_weight2 rest_sd_weight2
0 [10-20] 77.500000 45.961941 53.60 24.016661 87.500000 45.961941 63.60 24.016661
1 [20-30] 55.000000 42.426407 62.60 28.953411 65.000000 42.426407 72.60 28.953411
2 [30-40] 52.666667 15.821926 66.25 38.378596 62.666667 15.821926 76.25 38.378596
I am working to 2 files, oldFile.txt and newFile.txt and compute some changes between them. The newFile.txt is updated constantly and any updates will be written to oldFile.txt
I am trying to improve the snippet below by saving previous computed values and add it to a finalOutput.txt. Any idea will be very helpful to accomplish the needed output. Thank you in advance.
import pandas as pd
from time import sleep
def read_file(fn):
data = {}
with open(fn, 'r') as f:
for lines in f:
line = lines.rstrip()
pname, cnt, cat = line.split(maxsplit=2)
data.update({pname: {'pname': pname, 'cnt': int(cnt), 'cat': cat}})
return data
def process_data(oldfn, newfn):
old = read_file(oldfn)
new = read_file(newfn)
u_data = {}
for ko, vo in old.items():
if ko in new:
n = new[ko]
old_cnt = vo['cnt']
new_cnt = n['cnt']
u_cnt = old_cnt + new_cnt
tmp_old_cnt = 1 if old_cnt == 0 else old_cnt
cnt_change = 100 * (new_cnt - tmp_old_cnt) / tmp_old_cnt
u_data.update({ko: {'pname': n['pname'], 'cnt': new_cnt, 'cat': n['cat'],
'curr_change%': round(cnt_change, 0)}})
for kn, vn in new.items():
if kn not in old:
old_cnt = 1
new_cnt = vn['cnt']
cnt_change = 0
vn.update({'cnt_change': round(cnt_change, 0)})
u_data.update({kn: vn})
pd.options.display.float_format = "{:,.0f}".format
mydata = []
for _, v in u_data.items():
mydata.append(v)
df = pd.DataFrame(mydata)
df = df.sort_values(by=['cnt'], ascending=False)
# Save to text file.
with open('finalOutput.txt', 'w') as w:
w.write(df.to_string(header=None, index=False))
# Overwrite oldFile.txt
with open('oldFile.txt', 'w') as w:
w.write(df.to_string(header=None, index=False))
# Print in console.
df.insert(0, '#', range(1, 1 + len(df)))
print(df.to_string(index=False,header=True))
while True:
oldfn = './oldFile.txt'
newfn = './newFile.txt'
process_data(oldfn, newfn)
sleep(60)
oldFile.txt
e6c76e4810a464bc 1 Hello(HLL)
65b66cc4e81ac81d 2 CryptoCars (CCAR)
c42d0c924df124ce 3 GoldNugget (NGT)
ee70ad06df3d2657 4 BabySwap (BABY)
e5b7ebc589ea9ed8 8 Heroes&E... (HE)
7e7e9d75f5da2377 3 Robox (RBOX)
newfile.txt #-- content during 1st reading
e6c76e4810a464bc 34 Hello(HLL)
65b66cc4e81ac81d 43 CryptoCars (CCAR)
c42d0c924df124ce 95 GoldNugget (NGT)
ee70ad06df3d2657 15 BabySwap (BABY)
e5b7ebc589ea9ed8 37 Heroes&E... (HE)
7e7e9d75f5da2377 23 Robox (RBOX)
755507d18913a944 49 CharliesFactory
newfile.txt #-- content during 2nd reading
924dfc924df1242d 35 AeroDie (ADie)
e6c76e4810a464bc 34 Hello(HLL)
65b66cc4e81ac81d 73 CryptoCars (CCAR)
c42d0c924df124ce 15 GoldNugget (NGT)
ee70ad06df3d2657 5 BabySwap (BABY)
e5b7ebc589ea9ed8 12 Heroes&E... (HE)
7e7e9d75f5da2377 19 Robox (RBOX)
755507d18913a944 169 CharliesFactory
newfile.txt # content during 3rd reading
924dfc924df1242d 45 AeroDie (ADie)
e6c76e4810a464bc 2 Hello(HLL)
65b66cc4e81ac81d 4 CryptoCars (CCAR)
c42d0c924df124ce 7 GoldNugget (NGT)
ee70ad06df3d2657 5 BabySwap (BABY)
e5b7ebc589ea9ed8 3 Heroes&E... (HE)
7e7e9d75f5da2377 6 Robox (RBOX)
755507d18913a944 9 CharliesFactory
oldFile.txt #-- Current output that needs improvement
# pname cnt cat curr_change%
1 924dfc924df1242d 35 AeroDie (ADie) 29
2 755507d18913a944 9 CharliesFactory -95
3 c42d0c924df124ce 7 GoldNugget (NGT) -53
4 7e7e9d75f5da2377 6 Robox (RBOX) -68
5 ee70ad06df3d2657 5 BabySwap (BABY) 0
6 65b66cc4e81ac81d 4 CryptoCars (CCAR) -95
7 e5b7ebc589ea9ed8 3 Heroes&E... (HE) -75
8 e6c76e4810a464bc 2 Hello(HLL) -94
finalOutput.txt #-- Needed Improved Output with additional columns r1, r2 and so on depending on how many update readings
# curr_change% is the latest 3rd reading
# r2% is based on the 2nd reading
# r1% is based on the 1st reading
# pname cnt cat curr_change% r2% r1%
1 924dfc924df1242d 35 AeroDie (ADie) 29 0 0
2 755507d18913a944 9 CharliesFactory -95 245 0
3 c42d0c924df124ce 7 GoldNugget (NGT) -53 -84 3,067
4 7e7e9d75f5da2377 6 Robox (RBOX) -68 -17 667
5 ee70ad06df3d2657 5 BabySwap (BABY) 0 -67 275
6 65b66cc4e81ac81d 4 CryptoCars (CCAR) -95 70 2,050
7 e5b7ebc589ea9ed8 3 Heroes&E... (HE) -75 -68 362
8 e6c76e4810a464bc 2 Hello(HLL) -94 0 3,300
Updated for feedback, I made adjustments so that it would handle data that was fed to it live. Whenever new data is loaded, load the file name into process_new_file() function, and it will update the 'finalOutput.txt'.
For simplicity, I named the different files file1, file2, file3, and file4.
I'm doing most of the operations using the pandas Dataframe. I think working with Pandas DataFrames will make the task a lot easier for you.
Overall, I created one function to read the file and return a properly formatted DataFrame. I created a second function that compares the old and the new file and does the calculation you were looking for. I merge together the results of these calculations. Finally, I merge all of these calculations with the last file's data to get the output you're looking for.
import pandas as pd
global global_old_df
global results_df
global count
global_old_df = None
results_df = pd.DataFrame()
count = 0
def read_file(file_name):
rows = []
with open(file_name) as f:
for line in f:
rows.append(line.split(" ", 2))
df = pd.DataFrame(rows, columns=['pname', 'cnt', 'cat'])
df['cat'] = df['cat'].str.strip()
df['cnt'] = df['cnt'].astype(float)
return df
def compare_dfs(df_old, df_new, count):
df_ = df_old.merge(df_new, on=['pname', 'cat'], how='outer')
df_['r%s' % count] = (df_['cnt_y'] / df_['cnt_x'] - 1) * 100
df_ = df_[['pname', 'r%s' % count]]
df_ = df_.set_index('pname')
return df_
def process_new_file(file):
global global_old_df
global results_df
global count
df_new = read_file(file)
if global_old_df is None:
global_old_df = df_new
return
else:
count += 1
r_df = compare_dfs(global_old_df, df_new, count)
results_df = pd.concat([r_df, results_df], axis=1)
global_old_df = df_new
output_df = df_new.merge(results_df, left_on='pname', right_index=True)
output_df.to_csv('finalOutput.txt')
pd.options.display.float_format = "{:,.1f}".format
print(output_df.to_string())
files = ['file1.txt', 'file2.txt', 'file3.txt', 'file4.txt']
for file in files:
process_new_file(file)
This gives the output:
pname cnt cat r3 r2 r1
0 924dfc924df1242d 45.0 AeroDie (ADie) 28.6 NaN NaN
1 e6c76e4810a464bc 2.0 Hello(HLL) -94.1 0.0 3,300.0
2 65b66cc4e81ac81d 4.0 CryptoCars (CCAR) -94.5 69.8 2,050.0
3 c42d0c924df124ce 7.0 GoldNugget (NGT) -53.3 -84.2 3,066.7
4 ee70ad06df3d2657 5.0 BabySwap (BABY) 0.0 -66.7 275.0
5 e5b7ebc589ea9ed8 3.0 Heroes&E... (HE) -75.0 -67.6 362.5
6 7e7e9d75f5da2377 6.0 Robox (RBOX) -68.4 -17.4 666.7
7 755507d18913a944 9.0 CharliesFactory -94.7 244.9 NaN
So, to run it live, you'd just replace that last section with:
while True:
newfn = './newFile.txt'
process_new_file(newfn)
sleep(60)
I have a dataset where col a represent the number of total values in values e,i,d,t which are in string format separated by a "-"
a e i d t
0 4 40-80-120-150 0.5-0.3-0.2-0.2 30-32-30-32 1-1-1-1
1 4 40-40-40-40 0.1-0.1-0.1-0.1 18-18-18-18 1-2-3-4
3 4 40-80-120-150 0.5-0.3-0.2-0.2 30-32-30-32 1-1-1-1
5 4 40-40-40-40 0.1-0.1-0.1-0.1 18-18-18-18 1-2-3-4
I want to create 8 new columns, 4 representing the SUM of (e-i-d-t), 4 the product.
For example:
def funct_two_outputs(E, I, d, t, d_calib = 50):
return E+i+d+t, E*i*d*t
OUT first 2 values:
SUM_0, row0 = 40+0.5+30+1 SUM_1 = 80+0.3+32+1
The sum and product are example functions substituting my functions which are a bit more complicated.
I have written out a function **expand_on_col ** that creates separates all the e,i,d,t values into new columns:
def expand_on_col (df_, col_to_split = "namecol", sep='-', prefix="this"):
'''
Pass a df indicating on which col you want to split,
return a df with the col split with a prefix.
'''
df1 = df_[col_to_split].str.split(sep,expand=True).add_prefix(prefix)
df1 = pd.concat([df_,df1], axis=1).replace(np.nan, '-')
return df1
Now i need to create 4 new columsn that are the sum of eidt, and 4 that are the prodct.
Example output for SUM:
index a e i d t a-0 e-0 e-1 e-2 e-3 i-0 i-1 i-2 i-3 d-0 d-1 d-2 d-3 t-0 t-1 t-2 t-3 sum-0 sum-1 sum-2 sum-3
0 0 4 40-80-120-150 0.5-0.3-0.2-0.2 30-32-30-32 1-1-1-1 4 40 80 120 150 0.5 0.3 0.2 0.2 30 32 30 32 1 1 1 1 71 114 153 186
1 1 4 40-40-40-40 0.1-0.1-0.1-0.1 18-18-18-18 1-2-3-4 4 40 40 40 40 0.1 0.1 0.1 0.1 18 18 18 18 1 2 3 4 59 61 63 65
2 3 4 40-80-120-150 0.5-0.3-0.2-0.2 30-32-30-32 1-1-1-1 4 40 80 120 150 0.5 0.3 0.2 0.2 30 32 30 32 1 1 1 1 71 114 153 186
3 5 4 40-40-40-40 0.1-0.1-0.1-0.1 18-18-18-18 1-2-3-4 4 40 40 40 40 0.1 0.1 0.1 0.1 18 18 18 18 1 2 3 4 59 61 63 65
If i run the code with funct_one_output(only returns sum) it works, but wit the funct_two_outputs(suma and product) I get an error.
Here is the code:
import pandas as pd
def expand_on_col (df_, col_to_split = "namecol", sep='-', prefix="this"):
'''
Pass a df indicating on which col you want to split,
return a df with the col split with a prefix.
'''
df1 = df_[col_to_split].str.split(sep,expand=True).add_prefix(prefix)
df1 = pd.concat([df_,df1], axis=1).replace(np.nan, '-')
return df1
def funct_two_outputs(E, I, d, t, d_calib = 50): #the function i want to pass
return E+i+d+t, E*i*d*t
def funct_one_outputs(E, I, d, t, d_calib = 50): #for now i can olny use this one, cant use 2 return values.
return E+i+d+t
for col in columns:
df = expand_on_col (df_=df, col_to_split = col, sep='-', prefix=f"{col}-")
cols_ = df.columns.drop(columns)
df[cols_]= df[cols_].apply(pd.to_numeric, errors="coerce")
df["a"] = df["a"].apply(pd.to_numeric, errors="coerce")
df.reset_index(inplace=True)
for i in range (max(df["a"])):
name_1, name_2 = f"sum-{i}", f"mult-{i}"
df[name_1] = df.apply(lambda row: funct_one_outputs(E= row[f'e-{i}'], I=row[f'i-{i}'], d=row[f'd-{i}'], t=row[f"t-{i}"]), axis=1)
#if i try and fill 2 outputs it wont work
df[[name_1, name_2]] = df.apply(lambda row: funct_two_outputs(E= row[f'e-{i}'], I=row[f'i-{i}'], d=row[f'd-{i}'], t=row[f"t-{i}"]), axis=1)
OUT:
ValueError Traceback (most recent call last)
<ipython-input-306-85157b89d696> in <module>()
68 df[name_1] = df.apply(lambda row: funct_one_outputs(E= row[f'e-{i}'], I=row[f'i-{i}'], d=row[f'd-{i}'], t=row[f"t-{i}"]), axis=1)
69 #if i try and fill 2 outputs it wont work
---> 70 df[[name_1, name_2]] = df.apply(lambda row: funct_two_outputs(E= row[f'e-{i}'], I=row[f'i-{i}'], d=row[f'd-{i}'], t=row[f"t-{i}"]), axis=1)
71
72
2 frames
/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py in __setitem__(self, key, value)
3039 self._setitem_frame(key, value)
3040 elif isinstance(key, (Series, np.ndarray, list, Index)):
-> 3041 self._setitem_array(key, value)
3042 else:
3043 # set column
/usr/local/lib/python3.7/dist-packages/pandas/core/frame.py in _setitem_array(self, key, value)
3074 )[1]
3075 self._check_setitem_copy()
-> 3076 self.iloc._setitem_with_indexer((slice(None), indexer), value)
3077
3078 def _setitem_frame(self, key, value):
/usr/local/lib/python3.7/dist-packages/pandas/core/indexing.py in _setitem_with_indexer(self, indexer, value)
1751 if len(ilocs) != len(value):
1752 raise ValueError(
-> 1753 "Must have equal len keys and value "
1754 "when setting with an iterable"
1755 )
ValueError: Must have equal len keys and value when setting with an iterable
Don't Use apply
If you can help it
s = pd.to_numeric(
df[['e', 'i', 'd', 't']]
.stack()
.str.split('-', expand=True)
.stack()
)
sums = s.sum(level=[0, 2]).rename('Sum')
prods = s.prod(level=[0, 2]).rename('Prod')
sums_prods = pd.concat([sums, prods], axis=1).unstack()
sums_prods.columns = [f'{o}-{i}' for o, i in sums_prods.columns]
df.join(sums_prods)
a e i d t Sum-0 Sum-1 Sum-2 Sum-3 Prod-0 Prod-1 Prod-2 Prod-3
0 4 40-80-120-150 0.5-0.3-0.2-0.2 30-32-30-32 1-1-1-1 71.5 113.3 151.2 183.2 600.0 768.0 720.0 960.0
1 4 40-40-40-40 0.1-0.1-0.1-0.1 18-18-18-18 1-2-3-4 59.1 60.1 61.1 62.1 72.0 144.0 216.0 288.0
3 4 40-80-120-150 0.5-0.3-0.2-0.2 30-32-30-32 1-1-1-1 71.5 113.3 151.2 183.2 600.0 768.0 720.0 960.0
5 4 40-40-40-40 0.1-0.1-0.1-0.1 18-18-18-18 1-2-3-4 59.1 60.1 61.1 62.1 72.0 144.0 216.0 288.0
If my data looks like this
Index Country ted_Val1 sam_Val1 ... ted_Val10 sam_Val10
1 Australia 1 3 ... 20 5
2 Bambua 12 33 ... 15 56
3 Tambua 14 34 ... 10 58
df = pd.DataFrame([["Australia", 1, 3, 20, 5],
["Bambua", 12, 33, 15, 56],
["Tambua", 14, 34, 10, 58]
], columns=["Country", "ted_Val1", "sam_Val1", "ted_Val10", "sam_Val10"]
)
I'd like to subtract all 'val_' columns from all 'ted_' values using a list, creating a new column starting with 'dif_' such that:
Index Country ted_Val1 sam_Val1 diff_Val1 ... ted_Val10 sam_Val10 diff_val10
1 Australia 1 3 -2 ... 20 5 -15
2 Bambua 12 33 12 ... 15 56 -41
3 Tambua 14 34 14... 10 58 -48
so far I've got:
calc_vars = ['ted_Val1',
'sam_Val1',
'ted_Val10',
'sam_Val10']
for i in calc_vars:
df_diff['dif_' + str(i)] = df.['ted_' + str(i)] - df.['sam_' + str(i)]
but I'm getting errors, not sure where to go from here. As a warning this is dummy data and there can be several underscores in the names
IIUC you can use filter to choose the columns for subtraction (assuming your columns are properly sorted like your sample):
print (pd.concat([df, pd.DataFrame(df.filter(like="ted").to_numpy()-df.filter(like="sam").to_numpy(),
columns=["diff"+i.split("_")[-1] for i in df.columns if "ted_Val" in i])],1))
Country ted_Val1 sam_Val1 ted_Val10 sam_Val10 diff1 diff10
0 Australia 1 3 20 5 -2 15
1 Bambua 12 33 15 56 -21 -41
2 Tambua 14 34 10 58 -20 -48
try this,
calc_vars = ['ted_Val1', 'sam_Val1', 'ted_Val10', 'sam_Val10']
# extract even & odd values from calc_vars
# ['ted_Val1', 'ted_Val10'], ['sam_Val1', 'sam_Val10']
for ted, sam in zip(calc_vars[::2], calc_vars[1::2]):
df['diff_' + ted.split("_")[-1]] = df[ted] - df[sam]
Edit: if columns are not sorted,
ted_cols = sorted(df.filter(regex="ted_Val\d+"), key=lambda x : x.split("_")[-1])
sam_cols = sorted(df.filter(regex="sam_Val\d+"), key=lambda x : x.split("_")[-1])
for ted, sam in zip(ted_cols, sam_cols):
df['diff_' + ted.split("_")[-1]] = df[ted] - df[sam]
Country ted_Val1 sam_Val1 ted_Val10 sam_Val10 diff_Val1 diff_Val10
0 Australia 1 3 20 5 -2 15
1 Bambua 12 33 15 56 -21 -41
2 Tambua 14 34 10 58 -20 -48
I have written below function in python:
def proc_summ(df,var_names_in,var_names_group):
df['Freq']=1
df_summed=pd.pivot_table(df,index=(var_names_group),
values=(var_names_in),
aggfunc=[np.sum],fill_value=0,margins=True,margins_name='Total').reset_index()
df_summed.columns = df_summed.columns.map(''.join)
df_summed.columns = [x.strip().replace('sum', '') for x in df_summed.columns]
string_repr = df_summed.to_string(index=False,justify='center').splitlines()
string_repr.insert(1, "-" * len(string_repr[0]))
string_repr.insert(len(df_summed.index)+1, "-" * len(string_repr[0]))
out = '\n'.join(string_repr)
print(out)
And below is the code I am using to call the function:
proc_summ (
df,
var_names_in=["Freq","sal"] ,
var_names_group=["name","age"])
and below is the output:
name age Freq sal
--------------------
Arik 32 1 100
David 44 2 260
John 33 1 200
John 34 1 300
Peter 33 1 100
--------------------
Total 6 960
Please let me know how can I print the data to the center of the screen like :
name age Freq sal
--------------------
Arik 32 1 100
David 44 2 260
John 33 1 200
John 34 1 300
Peter 33 1 100
--------------------
Total 6 960
If you are using Python3 you can try something like this
import shutil
columns = shutil.get_terminal_size().columns
print("hello world".center(columns))
As You are Using DataFrame you can try something like this
import shutil
import pandas as pd
data = {'col1': [1, 2], 'col2': [3, 4]}
df = pd.DataFrame(data)
# convert DataFrame to string
df_string = df.to_string()
df_split = df_string.split('\n')
columns = shutil.get_terminal_size().columns
for i in range(len(df)):
print(df_split[i].center(columns))