how to add hyphen in between string using python - python

import csv
x=[]
y=[]
with open ('x_wind.txt','r') as csvfile:
plots= csv.reader(csvfile, delimiter=',')
for row in plots:
x.append(int(row[0]))
y.append(float(row[1]))
I have wrote above code to extract data from a file
now , i want my data should print as 2018-06-14:1
sample data is
2018061402,6.8750
2018061403,8.0000
2018061404,7.7500
2018061405,7.3750
2018061406,6.7500
2018061407,6.1250
2018061408,5.7500
2018061409,5.6250
2018061410,5.5000
2018061411,5.5000
2018061412,5.3750
2018061413,5.1250
2018061414,4.6250
2018061415,3.8750
2018061416,3.5000
2018061417,3.1250
2018061418,3.6250
2018061419,4.2500
2018061420,4.7500
2018061421,5.8750
2018061422,6.2500
2018061423,6.6250
2018061500,6.7500
2018061501,6.7500
2018061502,7.3750
2018061503,7.1250
2018061504,6.1250
2018061505,5.2500
2018061506,4.7500
2018061507,4.1250
2018061508,4.0000
2018061509,3.8750
2018061510,3.8750
2018061511,4.1250
2018061512,4.5000
2018061513,4.3750
2018061514,3.5000
2018061515,3.1250
2018061516,3.1250
2018061517,3.0000
2018061518,3.0000
2018061519,3.5000
2018061520,3.8750
2018061521,4.1250
2018061522,4.3750
2018061523,4.6250
2018061600,5.1250
2018061601,4.8750
2018061602,6.0000
2018061603,5.5000
2018061604,4.7500
2018061605,3.8750
2018061606,3.3750
2018061607,2.7500
2018061608,2.3750
2018061609,2.5000
2018061610,2.7500
2018061611,3.1250
2018061612,3.3750
2018061613,3.6250
2018061614,3.2500
2018061615,2.7500
2018061616,3.1250
2018061617,2.8750
2018061618,1.5000
2018061619,1.5000
2018061620,1.6250
2018061621,1.8750
2018061622,2.6250
2018061623,3.3750
2018061700,4.1250
2018061701,4.7500
2018061702,6.1250
2018061703,6.1250
2018061704,5.5000
2018061705,5.0000
2018061706,4.2500
2018061707,4.0000
2018061708,3.8750
2018061709,4.0000
2018061710,4.3750
2018061711,4.5000
2018061712,4.5000
2018061713,4.0000
2018061714,3.5000
2018061715,3.0000
2018061716,2.7500
2018061717,2.2500
2018061718,0.3750
2018061719,1.5000
2018061720,2.1250
2018061721,2.1250
2018061722,2.2500
2018061723,3.1250
2018061800,4.2500
2018061801,5.5000
2018061802,7.1250
2018061803,7.1250
2018061804,6.3750
2018061805,5.7500
2018061806,5.3750
2018061807,5.0000
2018061808,5.1250
2018061809,5.0000
2018061810,5.0000
2018061811,4.7500
2018061812,4.6250
2018061813,4.5000
2018061814,4.2500
2018061815,3.7500
2018061816,3.3750
2018061817,3.2500
2018061818,3.1250
2018061819,3.0000
2018061820,3.1250
2018061821,3.2500
2018061822,3.3750
2018061823,3.3750
2018061900,3.5000
2018061901,3.3750
2018061902,4.3750
2018061903,5.8750
2018061904,5.8750
2018061905,5.3750
2018061906,4.7500
2018061907,3.6250
2018061908,3.5000
2018061909,2.6250
2018061910,3.0000
2018061911,2.5000
2018061912,2.0000
2018061913,1.3750
2018061914,0.5000
2018061915,-0.2500
2018061916,-0.5000
2018061917,0.6250
2018061918,2.2500
2018061919,2.0000
2018061920,2.1250
2018061921,2.1250
2018061922,2.7500
2018061923,3.1250
2018062000,3.1250
2018062001,3.0000
2018062002,3.5000
2018062003,5.0000
2018062004,5.1250
2018062005,4.5000
2018062006,3.7500
2018062007,3.2500
2018062008,3.3750
2018062009,2.8750
2018062010,2.7500
2018062011,2.5000
2018062012,1.8750
2018062013,1.2500
2018062014,0.3750
2018062015,0.1250
2018062016,0.5000
2018062017,1.8750

Looks like you have a list of datetime object.
You can use the datetime module to convert it to your required format.
Ex:
import datetime
data = "2018061402"
print( datetime.datetime.strptime(data[:-2], "%Y%m%d").strftime("%Y-%m-%d") )
Output:
2018-06-14

Related

supplementary quotes appearing in my csv using python code

I did a code to generate multiple addresses and export it in csv
import csv
import ipaddress
import random
from random import shuffle
LAN = ipaddress.ip_network('192.168.0.0/16')
WAN1 = ipaddress.ip_network('11.10.8.0/22')
WAN2 = ipaddress.ip_network('12.10.8.0/22')
LAN_IP_Adresses = [ IP_LAN for IP_LAN in LAN.hosts()]
WAN1_IP_Adresses = [ IP_WAN1 for IP_WAN1 in WAN1.hosts()]
WAN2_IP_Adresses = [ IP_WAN2 for IP_WAN2 in WAN2.hosts()]
index_IP_GW = len(WAN1_IP_Adresses)-1
locations_list=['Abidjan','Abu Dhabi','Adana','Adelaide', 'Ahmadabad','Algiers','Amsterdam','Ankara','Anshan','Athens','BANGKOK','BUCHAREST','BUDAPEST','Bagdad','Bandung','Bangalore','Bangkok','Barcelona','Beirut','Belgrade','Bern','Bogota','Brasilia','Brazzaville','Brussels','Bursa','CAIRO','CARACAS','CONAKRY','Canberra','Casablanca','Changchun','Chengdu','Chicago','Copenhagen','Dakar','MINSK','Madrid','Medina','Nairobi','Napoli','Montreal',
'Odessa','Omdurman','Osaka','Ottawa','PYONGYANG','Paris','Pekin', 'Perth','Philadelphia','Phoenix','Pimpri Chinchwad','Porto','Porto Alegre','QUITO','Qingdao','Rabat','Rajkot','Riadh','Rio de Janeiro','Rome','SANTIAGO','Salvador','Samara','San Antonio','San Francisco','Sao Paulo','Sendai','Seongnam','Seoul','Shanghai','Singapore','Sydney','Taiyuan','Tehran','Tijuana','Tokyo','Toronto','Moscou','Moscow','Mumbai (Bombay)','Munich','México','Milan',
'Tripoli','Tunis','Vienna','Warsaw','Wuhan','Xian','Yaounde','Yokohama', 'Zapopan','hong kong','Dallas','Delhi','Doha','Dublin','Durban','Ecatepec','Frankfurt','Fukuoka','Giza','Hamburg','Havana','Helsinki','Houston','Hyderabad','Istanbul','Jaipur','Jakarta','Jeddah','Johannesburg','KIEV','Kaduna','Kano','Kazan','Kuala Lumpur''Kyoto','LUANDA','Lahore','Lanzhou','Le Caire','Leon','Lima','Lisbon','London','Los Angeles','Lyon','MANILA','Melbourne','New York']
#Site_Nmb=1
def initial_Sites_list_generator(filename='SITES_CI.csv', Number_of_Sites=1000):
file_to_output = open(filename,'w',newline='')
csv_writer = csv.writer(file_to_output,delimiter=',')
Site_Nbr=1
index = 0
csv_writer.writerow(["SITE_NAME", "SERIAL_NUMBER",'"LAN_IP_ADDRESS"','"WAN_IP_ADDRESS1"','"WAN_IP_ADDRESS2"','"GATEWAY_IP_ADDRESS1"','"GATEWAY_IP_ADDRESS2"','"ROUTE_REFLECTOR"','"LOCATIONS"','"HARDWAREMODEL"','"LANINT"','"WANINT1"','"WANINT2"','"BW_OUT"','"BW_IN"'])
for i in range(1,Number_of_Sites+1):
shuffle(locations_list)
location = random.choice(locations_list)
csv_writer.writerow(['"SITE'+ str(Site_Nbr)+'"',"2e70129bde9c4426b9213d4408c300",f'"{(LAN_IP_Adresses[index])}"',f'"{str(WAN1_IP_Adresses[index])}"',f'"{str(WAN2_IP_Adresses[index])}"',f'"{str(WAN1_IP_Adresses[index_IP_GW])}"',f'"{str(WAN2_IP_Adresses[index_IP_GW])}"','"False"',f'"{location}"','"ONEv600"','"gigabitethernet0/2"','"gigabitethernet0/0"','"gigabitethernet0/1"','"10"','"20"'])
Site_Nbr = Site_Nbr+1
index = index+1
file_to_output.close()
initial_Sites_list_generator('SITES_OVP.csv', 1000)
but i got unnecessary quotes added in my csv
You are adding the extra quotes yourself. In your for loop, change this line:
csv_writer.writerow(['"SITE'+ str(Site_Nbr)+'"',"2e70129bde9c4426b9213d4408c300",f'"{(LAN_IP_Adresses[index])}"',f'"{str(WAN1_IP_Adresses[index])}"',f'"{str(WAN2_IP_Adresses[index])}"',f'"{str(WAN1_IP_Adresses[index_IP_GW])}"',f'"{str(WAN2_IP_Adresses[index_IP_GW])}"','"False"',f'"{location}"','"ONEv600"','"gigabitethernet0/2"','"gigabitethernet0/0"','"gigabitethernet0/1"','"10"','"20"'])
to this:
csv_writer.writerow(['SITE'+ str(Site_Nbr)+"2e70129bde9c4426b9213d4408c300",
f'{(LAN_IP_Adresses[index])}',
f'{str(WAN1_IP_Adresses[index])}',
f'{str(WAN2_IP_Adresses[index])}',
f'{str(WAN1_IP_Adresses[index_IP_GW])}',
f'{str(WAN2_IP_Adresses[index_IP_GW])}',
'False',
f'{location}',
'ONEv600',
'gigabitethernet0/2',
'gigabitethernet0/0',
'gigabitethernet0/1',
'10',
'20'])
The CSV writer already adds quotes to strings as appropriate.
I did
csv_writer = csv.writer(file_to_output,delimiter=",",quoting=csv.QUOTE_ALL)
and it worked !

Error in retrieving financial data for large list of tickers from yahoo finance into a dataframe using for loop

In this particular problem, I have a very long list of tickers for which I want to retrieve some of the financial information from yahoo finance website using python:
here is the list:
tickers = ["OMKAR.BO", "KCLINFRA.BO", "MERMETL.BO", "PRIMIND.BO", "VISIONCO.BO", "PANAFIC.BO", "KARANWO.BO", "SOURCEIND.BO", "WELCURE.BO", "NAVKETAN.BO", "CUBIFIN.BO", "IMPEXFERRO.BO", "MISHTANN.BO", "SUMERUIND.BO", "MISHTANN.BO", "MADHUVEER.BO", "TNTELE.BO", "JMGCORP.BO", "GSLSEC.BO", "DEVKI.BO", "MINAXI.BO", "INNOCORP.BO", "SURYACHAKRA.BO", "ANKITMETAL.BO", "HAVISHA.BO", "SHIVA.BO", "COMFINTE.BO", "KONNDOR.BO", "PAZEL.BO", "SHARPINV.BO", "MIDINFRA.BO", "UNIVPRIM.BO", "ATHARVENT.BO", "FGP.BO", "BKV.BO", "VIVIDHA.BO", "FISCHER.BO", "ADITRI.BO", "GLFL.BO", "RAJOIL.BO", "ALFL.BO", "PURITY.BO", "ARCEEIN.BO", "INTECH.BO", "MIDEASTP.BO", "STANCAP.BO", "OCTAVE.BO", "TRIJAL.BO", "SREEJAYA.BO", "4THGEN.BO", "RICHIRICH.BO", "VIRTUALS.BO", "SAVINFOCO.BO", "TTIENT.BO", "OONE.BO", "TILAK.BO", "XTGLOBAL.BO", "MANGIND.BO", "ROYALIND.BO", "ASHUTPM.BO", "SMPL.BO", "BGPL.BO", "NYSSACORP.BO", "BILENERGY.BO", "YOGISUNG.BO", "DOLPHMED.BO", "PRATIK.BO", "IPOWER.BO", "BIHSPONG.BO", "CAPFIN.BO", "MCLTD.BO", "KGL.BO", "OMNIAX.BO", "HEERAISP.BO", "VISIONCINE.BO", "SWORDEDGE.BO", "AARVINFRA.BO", "ADVENT.BO", "UVDRHOR.BO", "SUNGOLD.BO", "USHDI.BO", "HINDAPL.BO", "IMEC.BO", "ARAVALIS.BO", "SERVOTEACH.BO", "SCAGRO.BO", "UMESLTD.BO", "CHARMS.BO", "NCLRESE.BO", "SYMBIOX.BO", "PRADIP.BO", "INTEGFD.BO", "CLIOINFO.BO", "RRSECUR.BO", "MUKATPIP.BO", "SYNCOMF.BO", "DYNAMICP.BO", "TRABI.BO", "RADAAN.BO", "KIRANSY-B.BO", "RAMSARUP.BO", "UNIMOVR.BO", "MELSTAR.BO", "OMANSH.BO", "VERTEX.BO", "VENTURA.BO", "GEMSPIN.BO", "EXPLICITFIN.BO", "PASARI.BO", "BABA.BO", "MAHAVIRIND.BO", "BAMPSL.BO", "GAJRA.BO", "SUNRAJDI.BO", "ACCEL.BO", "SIMPLXPAP.BO", "PHARMAID.BO", "JATALIA.BO", "TWINSTAR.BO", "CINDRELL.BO", "SHRGLTR.BO", "EUROMULTI.BO", "CRESSAN.BO", "SEVENHILL.BO", "QUADRANT.BO", "PHTRADING.BO", "SIPTL.BO", "HOTELRUGBY.BO", "KAUSHALYA.BO", "YASHRAJC.BO", "ASHAI.BO", "BERYLSE.BO", "LLOYDSTEEL.BO", "SCANPRO.BO", "HBLEAS.BO", "ASHCAP.BO", "SUNSHINE.BO", "AREALTY.BO", "MSCTC.BO", "HARIAEXPO.BO", "CNIRESLTD.BO", "KABRADG.BO", "CLFL.BO", "TRANSASIA.BO", "KACL.BO", "JAIHINDS.BO", "SANBLUE.BO", "DHENUBUILD.BO", "DHENUBUILD.BO", "ODYCORP.BO", "SAWABUSI.BO", "KAKTEX.BO", "GANONPRO.BO", "GENUSPRIME.BO", "EUREKAI.BO", "CHROMATIC.BO", "ISHWATR.BO", "INTEGRA.BO", "KACL.BO", "SSLFINANCE.BO", "ORIENTTR.BO", "ZHINUDYP.BO", "SWADEIN.BO", "SHKALYN.BO", "BAPACK.BO", "MARUTISE.BO", "PMTELELIN.BO", "SPARCSYS.BO", "GOLKONDA.BO", "DECPO.BO", "NATHUEC.BO", "INDOCITY.BO", "IOSYSTEM.BO", "ADVIKCA.BO", "JRFOODS.BO", "INFOMEDIA.BO", "INDRANIB.BO", "REGTRUS.BO", "RAGHUNAT.BO", "DCMFINSERV.BO", "RRIL.BO", "FILATFASH.BO", "ISWL.BO", "ASINPET.BO", "KORE.BO", "UNIOFFICE.BO", "GUJINV.BO", "QUEST.BO", "GLITTEKG.BO", "AMFORG.BO", "LGBFORGE.BO", "MAL.BO", "CYBERMAT.BO", "AGRIMONY.BO", "METKORE.BO", "SKYLMILAR.BO", "KIRANPR.BO", "RAJSPTR.BO", "SHVFL.BO", "MPFSL.BO", "AMITINT.BO", "KREONFIN.BO", "GRAVITY.BO", "KACHCHH.BO", "STELLANT.BO", "DEVINE.BO", "ICSL.BO", "STELLAR.BO", "CORAGRO.BO", "ARCFIN.BO", "GAMMNINFRA.BO", "EMMSONS.BO", "OSCARGLO.BO", "HARIAAPL.BO", "CORNE.BO", "FACORALL.BO", "KANELIND.BO", "INDOASIAF.BO", "BHANDHOS.BO", "GAGANPO.BO", "SELMCL.BO", "VENLONENT.BO", "KBSINDIA.BO", "RAMAPETRO.BO", "UTIQUE.BO", "GUJSTATFIN.BO", "COUNCODOS.BO", "JDORGOCHEM.BO", "ANSHNCO.BO", "SILVERO.BO", "CONSTRONIC.BO", "SIPIND.BO", "ESARIND.BO", "GUJCOTEX.BO", "HILIKS.BO", "MINFY.BO", "LEENEE.BO", "DUGARHOU.BO", "JHACC.BO", "CINERAD.BO", "GCMCAPI.BO", "GCMCOMM.BO", "CHENFERRO.BO", "MANCREDIT.BO", "TRICOMFRU.BO", "VEGETABLE.BO", "JSHL.BO", "HATHWAYB.BO", "JAYIND.BO", "ROYALCU.BO", "DHANADACO.BO", "ELCIDIN.BO", "RAGHUTOB.BO", "GISOLUTION.BO", "RAGHUTOB.BO", "CONTICON.BO", "NETWORK.BO", "BANASFN.BO", "CRANESSOFT.BO", "RSCINT.BO", "JPTRLES.BO", "ALOKTEXT.BO", "PRAGBOS.BO", "WELTI.BO", "EKAMLEA.BO", "MASL.BO", "SAFFRON.BO", "SRDAPRT.BO", "FFPL.BO", "RITESHIN.BO", "BLOIN.BO", "YARNSYN.BO", "OISL.BO", "POLYTEX.BO", "SPSINT.BO", "GCMCOMM.BO", "FRONTCAP.BO", "SEZAL.BO", "CITYMAN.BO", "AJEL.BO", "ESCORTSFIN.BO", "ABHIINFRA.BO", "PRATIKSH.BO", "JCTLTD.BO", "GENESIS.BO", "HINDSECR.BO", "GKCONS.BO", "MODWOOL.BO", "ROHITFERRO.BO", "NMSRESRC.BO", "VARIMAN.BO", "WAGEND.BO", "INDLEASE.BO", "APOORVA.BO", "HITTCO.BO", "PREMPIPES.BO", "SRMENERGY.BO", "KEDIACN.BO", "TOYAMIND.BO", "EPSOMPRO.BO", "RICHUNV.BO", "CITYONLINE.BO", "ELANGO.BO", "AMITSEC.BO", "CTL.BO", "LPDC.BO", "CONTCHM.BO", "NTL.BO", "SYBLY.BO", "ELEFLOR.BO", "KMFBLDR.BO", "TRIVIKRAMA.BO", "RUCHINFRA.BO", "PROMACT.BO", "USHAKIRA.BO", "ARUNAHTEL.BO", "CIL.BO", "MOUNTSHIQ.BO", "SPTRSHI.BO", "SEATV.BO", "SWASTIVI.BO", "SUNDARAM.BO", "CREATIVEYE.BO", "EUROASIA.BO", "ANJANIFIN.BO", "ADARSH.BO", "GLOBALCA.BO", "INDERGR.BO", "USGTECH.BO", "RASIELEC.BO", "SHEETAL.BO", "SYLPH.BO", "GOYALASS.BO", "KANSAFB.BO", "ANERI.BO", "DRL.BO", "OSWALOR.BO", "SWAGRUHA.BO", "SARTHAKIND.BO", "GALADA.BO", "OSWAYRN.BO", "TRINITYLEA.BO", "GOLCA.BO", "SODFC.BO", "LEADFIN.BO", "KAYPOWR.BO", "PANELEC.BO", "TARAI.BO", "SANJIVIN.BO", "MKTCREAT.BO", "ECOBOAR.BO", "SUNRINV.BO", "MAYURFL.BO", "GARWAMAR.BO", "SURYAKR.BO", "BESTAGRO.BO", "INDCEMCAP.BO", "EASTSILK.BO", "MPAGI.BO", "HRMNYCP.BO", "RUBRAME.BO", "INCON.BO", "AMRAPLIN.BO", "RESPONSINF.BO", "BACPHAR.BO", "KRISHNACAP.BO", "SHBHAWPA.BO", "TOWASOK.BO", "PADMALAYAT.BO", "MHSGRMS.BO", "JMTAUTOLTD.BO", "WELCON.BO", "UNITEDTE.BO", "MNPLFIN.BO", "PARSHINV.BO", "UNISHIRE.BO", "RAJINFRA.BO", "MMLF.BO", "ALCHCORP.BO", "CHMBBRW.BO", "NOGMIND.BO", "SHRMFGC.BO", "SAMTEX.BO", "SUPERTEX.BO", "JAIHINDPRO.BO", "CENTEXT.BO", "BCG.BO", "GENNEX.BO", "EDUCOMP.BO", "SHIVAGR.BO", "ADINATH.BO", "MINID.BO", "SURANAT&P.BO", "GYANDEV.BO", "AVTIL.BO", "ZSWASTSA.BO", "JINDCAP.BO", "NBFOOT.BO", "SHESHAINDS.BO", "UTLINDS.BO", "MADHUSE.BO", "THAMBBI.BO", "KKPLASTICK.BO", "VAGHANI.BO", "SOLIDCO.BO", "HIMFIBP.BO", "KKFIN.BO", "CSL.BO", "GOPAIST.BO", "BALTE.BO", "ETIL.BO", "PAOS.BO", "RAINBOWDQ.BO", "JAGSONFI.BO", "REGENTRP.BO", "AFEL.BO", "BRIPORT.BO", "SURATEX.BO", "INFRAIND.BO", "SPENTEX.BO", "TITANSEC.BO", "ALPSINDUS.BO", "UNISTRMU.BO", "SPECMKT.BO", "SAENTER.BO", "TOKYOFIN.BO", "TRANSFD.BO", "BSELINFRA.BO", "WELSPLSOL.BO", "SONALAD.BO", "CRIMSON.BO", "UNITY.BO", "VIKASPROP.BO", "VELHO.BO", "SYNCOM.BO", "CYBELEIND.BO", "VANICOM.BO", "THAKRAL.BO", "INDOEURO.BO", "ALAN SCOTT.BO", "SALSTEEL.BO", "ADITYA.BO", "HASTIFIN.BO", "NIBE.BO", "JOINTECAED.BO", "GANGAPHARM.BO", "SBECSUG.BO", "EASTBUILD.BO", "LORDSHOTL.BO", "IYKOTHITE.BO", "URJAGLOBA.BO", "DHRUVCA.BO", "RAP.BO", "LAHL.BO", "MONNETIN.BO", "SETUINFRA.BO", "RRMETAL.BO", "GTLINFRA.BO", "ECOM.BO", "TTML.BO", "ARNOLD.BO", "FLORATX.BO", "GARODCH.BO", "PUROHITCON.BO", "KAMRLAB.BO", "MILESTONE.BO", "NETLINK.BO", "MARSONS.BO", "SESL.BO", "OBRSESY.BO", "VRWODAR.BO", "NUWAY.BO", "CJGEL.BO", "REDEXPR.BO", "AISHWARYA.BO", "PICTUREHS.BO", "BAGFILMS.BO", "WOODSVILA.BO", "MEHSECU.BO", "MBPARIKH.BO", "SICLTD.BO", "GITARENEW.BO", "DESHRAK.BO", "SENINFO.BO", "TELECANOR.BO", "STLSTRINF.BO", "JRELTD.BO", "OROSMITHS.BO", "MUNOTHFI.BO", "AVAILFC.BO", "NITINFIRE.BO", "PIFL.BO", "BLBLIMITED.BO", "SRECR.BO", "NAGTECH.BO", "ARISE.BO", "FRONTBUSS.BO", "PAEL.BO", "ROLLT.BO", "VALLABH.BO", "RANASUG.BO", "STRATMONT.BO", "SANTOSHF.BO", "SVAINDIA.BO", "PARKERAC.BO", "VSFPROJ.BO", "AUROCOK.BO", "HKG.BO", "CASTEXTECH.BO", "HOWARHO.BO", "RTNPOWER.BO", "SHRIBCL.BO", "GARWSYN.BO", "MEHSECU.BO", "PRAVEG.BO", "MEHTAHG.BO", "RTNINFRA.BO", "MMWL.BO", "GAGAN.BO", "WWALUM.BO", "HEMANG.BO", "DOLAT.BO", "SUPTANERY.BO", "EUROCERA.BO", "SURFI.BO", "TTIL.BO", "VARDHMAN.BO", "SUPERBAK.BO", "ESHAMEDIA.BO", "CONTILI.BO", "CESL.BO", "DAULAT.BO", "RAJATH.BO", "SURYVANSP.BO", "KUWERIN.BO", "SVARTCORP.BO", "SKRABUL.BO", "WSIND.BO", "DELTA.BO", "SIPL.BO", "RMCHEM.BO", "STDBAT.BO", "PICCASUG.BO", "AGIOPAPER.BO", "SHREYASI.BO", "CCCL.BO", "GAL.BO", "GOLECHA.BO", "RAAJMEDI.BO", "KINETRU.BO", "ZKHANDEN.BO", "LAKHOTIA.BO", "SANINFRA.BO", "KABSON.BO", "ENTRINT.BO", "SIROHIA.BO", "3IINFOTECH.BO", "MEHIF.BO", "BASANTGL.BO", "MAITRI.BO", "CEENIK.BO", "MAXIMAA.BO", "STANPACK.BO", "CRANEINFRA.BO", "CHITRTX.BO", "CAPRICORN.BO", "TAVERNIER.BO", "JPPOWER.BO", "PATIDAR.BO", "BANSTEA.BO", "NEWMKTADV.BO", "DANUBE.BO", "MAHALXSE.BO", "SARDAPPR.BO", "KZLFIN.BO", "ABHIFIN.BO", "AVI.BO", "GAYATRIBI.BO", "VXLINSTR.BO", "ADITYASP.BO", "OMKARPH.BO", "ESSARSEC.BO", "SALSAIN.BO", "NDASEC.BO", "PARABDRUGS.BO", "EPIC.BO", "HIGHSTREE.BO", "TRIMURTHI.BO", "DBSTOCKBRO.BO", "ADARSHPL.BO", "SONAL.BO", "FRASER.BO", "BRIDGESE.BO", "GBGLOBAL.BO", "UNRYLMA.BO", "ANNAINFRA.BO", "RTEXPO.BO", "FUNDVISER.BO", "LIBORD.BO", "HYPERSOFT.BO", "JTAPARIA.BO", "ANUBHAV.BO", "MEGFI.BO", "ACTIONFI.BO", "BCLENTERPR.BO", "RAMSONS.BO", "GUJARATPOLY.BO", "SBFL.BO", "CHDCHEM.BO", "MONEYBOXX.BO", "ALSL.BO", "DEVHARI.BO", "NARPROP.BO", "PIONAGR.BO", "JAYBHCR.BO", "QGO.BO", "KRIFILIND.BO", "GOLDCOINHF.BO", "GALLOPENT.BO", "MIC.BO", "INTELLCAP.BO", "ABIRAFN.BO", "OLPCL.BO", "ZSHERAPR.BO", "CELLA.BO", "ZSANMCOM.BO", "STEELCO.BO", "VFL.BO", "MODAIRY.BO", "ZSANMCOM.BO", "STEELCO.BO", "SHUKJEW.BO", "JAYKAY.BO", "MIC.BO", "MODAIRY.BO", "RGIL.BO", "GSBFIN.BO", "OLPCL.BO", "HINDMOTORS.BO", "GAJANANSEC.BO", "MKEXIM.BO", "BERLDRG.BO", "KUBERJI.BO", "ADDIND.BO", "INDOSOLAR.BO", "GOLDCOINHF.BO", "ACIIN.BO", "UNITINT.BO", "SDC.BO", "RAJKSYN.BO", "CHAMAK.BO", "BHILSPIN.BO", "PANORAMA.BO", "REGAL.BO", "KRRAIL.BO", "AMS.BO", "PARIKSHA.BO", "SURYAINDIA.BO", "ADHARSHILA.BO", "AMARNATH.BO", "JAYATMA.BO", "CANOPYFIN.BO", "FMEC.BO", "CITL.BO", "DAL.BO", "YORKEXP.BO", "MEWATZI.BO"]
and then what I am doing is as below in which I want to get Market Capitalization for each of the tickers in the above list:
from pandas_datareader import data
import pandas as pd
tickers = tick[0:30]
dat = data.get_quote_yahoo(tickers)['marketCap']
print(dat)
I am able to fetch 20-30 tickers using above code, but if I try to pull all, the code throws an error "request timed out" and "list out of range" etc.
Then I tried to fetch data one by one using for loop as below:
f = pd.DataFrame(columns=["Ticker", "MarketCapINR"])
columns = list(df)
for tick in ticks:
dat = data.get_quote_yahoo(tick)['marketCap']
zipped = zip(columns, dat)
a_dictionary = dict(zipped)
df = df.append(a_dictionary, ignore_index=True)
This returned me two errors, one of these is list out of bound and another (when I tried to shorten the length of list using 'slicing'), request timed out.
Is there a way out, to get all the data (ticker names in first column and MarketCap values in second column of a pandas dataframe) ??
..
Here's a solution using a package called yahooquery. Disclaimer: I am the author of the package.
from yahooquery import Ticker
import pandas as pd
tickers = [...] # Use your list above
t = Ticker(tickers)
data = t.quotes
df = pd.DataFrame(data).T
df['marketCap']
OMKAR.BO 4750000
KCLINFRA.BO 26331000
MERMETL.BO 11472136
PRIMIND.BO 22697430
VISIONCO.BO 14777874
...
CANOPYFIN.BO 93859304
FMEC.BO 10542380
CITL.BO NaN
YORKEXP.BO 57503880
MEWATZI.BO 51200000
Name: marketCap, Length: 632, dtype: object

Looking over a csv file for various conditions

00,0,6098
00,1,6098
00,2,6098
00,3,6098
00,4,6094
00,5,6094
01,0,8749
01,1,8749
01,2,8749
01,3,88609
01,4,88609
01,5,88609
01,6,88611
01,7,88611
01,8,88611
02,0,9006
02,1,9006
02,2,4355
02,3,9013
02,4,9013
02,5,9013
02,6,4341
02,7,4341
02,8,4341
02,9,4341
03,0,6285
03,1,6285
03,2,6285
03,3,6285
03,4,6278
03,5,6278
03,6,6278
03,7,6278
03,8,8960
I have a csv file and a bit of it is shown above.
What I want to do is if the column 0 has the same value, it makes a an array of column 2, prints the array. ie- for 00, it makes an array-
a = [6098,6098,6098,6098,6094,6094]
for 01, it makes an array-
a = [8749,8749,88609,88609,88609,88611,88611,88611]
I don't know how to loop over this file.
This solution assumes that the first column will appear in sorted order in the file.
def main():
import csv
from itertools import groupby
with open("csv.csv") as file:
reader = csv.reader(file)
rows = [[row[0]] + [int(item) for item in row[1:]] for row in reader]
groups = {}
for key, group in groupby(rows, lambda row: row[0]):
groups[key] = [row[2] for row in group]
print(groups["00"])
print(groups["01"])
print(groups["02"])
print(groups["03"])
return 0
if __name__ == "__main__":
import sys
sys.exit(main())
Output:
[6098, 6098, 6098, 6098, 6094, 6094]
[8749, 8749, 8749, 88609, 88609, 88609, 88611, 88611, 88611]
[9006, 9006, 4355, 9013, 9013, 9013, 4341, 4341, 4341, 4341]
[6285, 6285, 6285, 6285, 6278, 6278, 6278, 6278, 8960]
The idea is to use a dictionary in which 00, 01 etc will be the keys and value will be a list. So you need to iterate through the csv data and push these data to corresponding keys.
import csv
result = {}
with open("you csv file", "r") as csvfile:
data = csv.reader(csvfile)
for row in data:
if result.has_key(row[0]):
result[row[0]].append(row[2])
else:
result[row[0]] = [row[2]]
print (result)
Here
from collections import defaultdict
txt = '''00,0,6098
00,1,6098
00,2,6098
00,3,6098
00,4,6094
00,5,6094
01,0,8749
01,1,8749
01,2,8749
01,3,88609
01,4,88609
01,5,88609
01,6,88611
01,7,88611
01,8,88611
02,0,9006
02,1,9006
02,2,4355
02,3,9013
02,4,9013
02,5,9013
02,6,4341
02,7,4341
02,8,4341
02,9,4341
03,0,6285
03,1,6285
03,2,6285
03,3,6285
03,4,6278
03,5,6278
03,6,6278
03,7,6278
03,8,8960'''
data_holder = defaultdict(list)
lines = txt.split('\n')
for line in lines:
fields = line.split(',')
data_holder[fields[0]].append(fields[2])
for k,v in data_holder.items():
print('{} -> {}'.format(k,v))
output
02 -> ['9006', '9006', '4355', '9013', '9013', '9013', '4341', '4341', '4341', '4341']
03 -> ['6285', '6285', '6285', '6285', '6278', '6278', '6278', '6278', '8960']
00 -> ['6098', '6098', '6098', '6098', '6094', '6094']
01 -> ['8749', '8749', '8749', '88609', '88609', '88609', '88611', '88611', '88611']

Python - How to count specific section in a list

I'm brand new to python and I'm struggling how to add certain sections of a cvs file in python. I'm not allowed to use "import cvs"
I'm importing the TipJoke CVS file from https://vincentarelbundock.github.io/Rdatasets/datasets.html
This is the only code I have so far that worked and I'm at a total loss on where to go from here.
if __name__ == '__main__':
from pprint import pprint
from string import punctuation
f = open("TipJoke.csv", "r")
tipList = []
for line in f:
#deletes the quotes
line = line.replace('"', '')
tipList.append(line)
pprint(tipList[])
Output:
[',Card,Tip,Ad,Joke,None\n',
'1,None,1,0,0,1\n',
'2,Joke,1,0,1,0\n',
'3,Ad,0,1,0,0\n',
'4,None,0,0,0,1\n',
'5,None,1,0,0,1\n',
'6,None,0,0,0,1\n',
'7,Ad,0,1,0,0\n',
'8,Ad,0,1,0,0\n',
'9,None,0,0,0,1\n',
'10,None,0,0,0,1\n',
'11,None,1,0,0,1\n',
'12,Ad,0,1,0,0\n',
'13,None,0,0,0,1\n',
'14,Ad,1,1,0,0\n',
'15,Joke,1,0,1,0\n',
'16,Joke,0,0,1,0\n',
'17,Joke,1,0,1,0\n',
'18,None,0,0,0,1\n',
'19,Joke,0,0,1,0\n',
'20,None,0,0,0,1\n',
'21,Ad,1,1,0,0\n',
'22,Ad,1,1,0,0\n',
'23,Ad,0,1,0,0\n',
'24,Joke,0,0,1,0\n',
'25,Joke,1,0,1,0\n',
'26,Joke,0,0,1,0\n',
'27,None,1,0,0,1\n',
'28,Joke,1,0,1,0\n',
'29,Joke,1,0,1,0\n',
'30,None,1,0,0,1\n',
'31,Joke,0,0,1,0\n',
'32,None,1,0,0,1\n',
'33,Joke,1,0,1,0\n',
'34,Ad,0,1,0,0\n',
'35,Joke,0,0,1,0\n',
'36,Ad,1,1,0,0\n',
'37,Joke,0,0,1,0\n',
'38,Ad,0,1,0,0\n',
'39,Joke,0,0,1,0\n',
'40,Joke,0,0,1,0\n',
'41,Joke,1,0,1,0\n',
'42,None,0,0,0,1\n',
'43,None,0,0,0,1\n',
'44,Ad,0,1,0,0\n',
'45,None,0,0,0,1\n',
'46,None,0,0,0,1\n',
'47,Ad,0,1,0,0\n',
'48,Joke,0,0,1,0\n',
'49,Joke,1,0,1,0\n',
'50,None,1,0,0,1\n',
'51,None,0,0,0,1\n',
'52,Joke,1,0,1,0\n',
'53,Joke,1,0,1,0\n',
'54,Joke,0,0,1,0\n',
'55,None,1,0,0,1\n',
'56,Ad,0,1,0,0\n',
'57,Joke,0,0,1,0\n',
'58,None,0,0,0,1\n',
'59,Ad,0,1,0,0\n',
'60,Joke,1,0,1,0\n',
'61,Ad,0,1,0,0\n',
'62,None,1,0,0,1\n',
'63,Joke,0,0,1,0\n',
'64,Ad,0,1,0,0\n',
'65,Joke,0,0,1,0\n',
'66,Ad,0,1,0,0\n',
'67,Ad,0,1,0,0\n',
'68,Ad,0,1,0,0\n',
'69,None,0,0,0,1\n',
'70,Joke,1,0,1,0\n',
'71,None,1,0,0,1\n',
'72,None,0,0,0,1\n',
'73,None,0,0,0,1\n',
'74,Joke,0,0,1,0\n',
'75,Ad,1,1,0,0\n',
'76,Ad,0,1,0,0\n',
'77,Ad,1,1,0,0\n',
'78,Joke,0,0,1,0\n',
'79,Joke,0,0,1,0\n',
'80,Ad,1,1,0,0\n',
'81,Ad,0,1,0,0\n',
'82,None,0,0,0,1\n',
'83,Ad,0,1,0,0\n',
'84,Joke,0,0,1,0\n',
'85,Joke,0,0,1,0\n',
'86,Ad,1,1,0,0\n',
'87,None,1,0,0,1\n',
'88,Joke,1,0,1,0\n',
'89,Ad,0,1,0,0\n',
'90,None,0,0,0,1\n',
'91,None,0,0,0,1\n',
'92,Joke,0,0,1,0\n',
'93,Joke,0,0,1,0\n',
'94,Ad,0,1,0,0\n',
'95,Ad,0,1,0,0\n',
'96,Ad,0,1,0,0\n',
'97,Joke,1,0,1,0\n',
'98,None,0,0,0,1\n',
'99,None,0,0,0,1\n',
'100,None,1,0,0,1\n',
'101,Joke,0,0,1,0\n',
'102,Joke,0,0,1,0\n',
'103,Ad,1,1,0,0\n',
'104,Ad,0,1,0,0\n',
'105,Ad,0,1,0,0\n',
'106,Ad,1,1,0,0\n',
'107,Ad,0,1,0,0\n',
'108,None,0,0,0,1\n',
'109,Ad,0,1,0,0\n',
'110,Joke,1,0,1,0\n',
'111,None,0,0,0,1\n',
'112,Ad,0,1,0,0\n',
'113,Ad,0,1,0,0\n',
'114,None,0,0,0,1\n',
'115,Ad,0,1,0,0\n',
'116,None,0,0,0,1\n',
'117,None,0,0,0,1\n',
'118,Ad,0,1,0,0\n',
'119,None,1,0,0,1\n',
'120,Ad,1,1,0,0\n',
'121,Ad,0,1,0,0\n',
'122,Ad,1,1,0,0\n',
'123,None,0,0,0,1\n',
'124,None,0,0,0,1\n',
'125,Joke,1,0,1,0\n',
'126,Joke,1,0,1,0\n',
'127,Ad,0,1,0,0\n',
'128,Joke,0,0,1,0\n',
'129,Joke,0,0,1,0\n',
'130,Ad,0,1,0,0\n',
'131,None,0,0,0,1\n',
'132,None,0,0,0,1\n',
'133,None,0,0,0,1\n',
'134,Joke,1,0,1,0\n',
'135,Ad,0,1,0,0\n',
'136,None,0,0,0,1\n',
'137,Joke,0,0,1,0\n',
'138,Ad,0,1,0,0\n',
'139,Ad,0,1,0,0\n',
'140,None,0,0,0,1\n',
'141,Joke,0,0,1,0\n',
'142,None,0,0,0,1\n',
'143,Ad,0,1,0,0\n',
'144,None,1,0,0,1\n',
'145,Joke,0,0,1,0\n',
'146,Ad,0,1,0,0\n',
'147,Ad,0,1,0,0\n',
'148,Ad,0,1,0,0\n',
'149,Joke,1,0,1,0\n',
'150,Ad,1,1,0,0\n',
'151,Joke,1,0,1,0\n',
'152,None,0,0,0,1\n',
'153,Ad,0,1,0,0\n',
'154,None,0,0,0,1\n',
'155,None,0,0,0,1\n',
'156,Ad,0,1,0,0\n',
'157,Ad,0,1,0,0\n',
'158,Joke,0,0,1,0\n',
'159,None,0,0,0,1\n',
'160,Joke,1,0,1,0\n',
'161,None,1,0,0,1\n',
'162,Ad,1,1,0,0\n',
'163,Joke,0,0,1,0\n',
'164,Joke,0,0,1,0\n',
'165,Ad,0,1,0,0\n',
'166,Joke,1,0,1,0\n',
'167,Joke,1,0,1,0\n',
'168,Ad,0,1,0,0\n',
'169,Joke,1,0,1,0\n',
'170,Joke,0,0,1,0\n',
'171,Ad,0,1,0,0\n',
'172,Joke,0,0,1,0\n',
'173,Joke,0,0,1,0\n',
'174,Ad,0,1,0,0\n',
'175,None,0,0,0,1\n',
'176,Joke,1,0,1,0\n',
'177,Ad,0,1,0,0\n',
'178,Joke,0,0,1,0\n',
'179,Joke,0,0,1,0\n',
'180,None,0,0,0,1\n',
'181,None,0,0,0,1\n',
'182,Ad,0,1,0,0\n',
'183,None,0,0,0,1\n',
'184,None,0,0,0,1\n',
'185,None,0,0,0,1\n',
'186,None,0,0,0,1\n',
'187,Ad,0,1,0,0\n',
'188,None,1,0,0,1\n',
'189,Ad,0,1,0,0\n',
'190,Ad,0,1,0,0\n',
'191,Ad,0,1,0,0\n',
'192,Joke,1,0,1,0\n',
'193,Joke,0,0,1,0\n',
'194,Ad,0,1,0,0\n',
'195,None,0,0,0,1\n',
'196,Joke,1,0,1,0\n',
'197,Joke,0,0,1,0\n',
'198,Joke,1,0,1,0\n',
'199,Ad,0,1,0,0\n',
'200,None,0,0,0,1\n',
'201,Joke,1,0,1,0\n',
'202,Joke,0,0,1,0\n',
'203,Joke,0,0,1,0\n',
'204,Ad,0,1,0,0\n',
'205,None,0,0,0,1\n',
'206,Ad,0,1,0,0\n',
'207,Ad,0,1,0,0\n',
'208,Joke,0,0,1,0\n',
'209,Ad,0,1,0,0\n',
'210,Joke,0,0,1,0\n',
'211,None,0,0,0,1\n']
I'm currently trying to find the Total number of entries of the specified card type and the Percentage of tips given for the specified card type with two decimal places of precision. The tip column is the 0 or 1 right after the card type (None, Ad, Joke).
if you are allowed with pandas library then
import pandas as pd
df = pd.read_csv("TipJoke.csv")
df is a pandas dataframe object in which you can perform multiple filtering task according to your need.
for example if you want to get data for Joke you can filter like this:
print(df[df["Card"] == "Joke"])
Though, i'm just providing you the direction , not whole logic for your question.
This works
from pprint import pprint
from string import punctuation
counts = {"Joke": 0, "Ad": 0, "None": 0}
with open("TipJoke.csv", "r") as f:
for line in f:
line_clean = line.replace('"', "").replace("\n", "").split(",")
try:
counts[line_clean[1]] += int(line_clean[2])
except:
pass
print(counts)

How to load a CSV with nested arrays

I came across a dataset of Twitter users (Kaggle Source) but I have found that the dataset has a rather strange format. It contains a row with column headers, and then rows of what are essentially JSON arrays. The dataset is also quite large which makes it very difficult to convert the entire file into JSON objects.
What is a good way to load this data into Python, preferably a Pandas Dataframe?
Example of Data
id,screenName,tags,avatar,followersCount,friendsCount,lang,lastSeen,tweetId,friends
"1969527638","LlngoMakeEmCum_",[ "#nationaldogday" ],"http://pbs.twimg.com/profile_images/534286217882652672/FNmiQYVO_normal.jpeg",319,112,"en",1472271687519,"769310701580083200",[ "1969574754", "1969295556", "1969284056", "1969612214", "1970067476", "1969797386", "1969430539", "1969840064", "1969698176", "1970005154", "283011644", "1969901029", "1969563175", "1969302314", "1969978662", "1969457936", "1969667533", "1969547821", "1969943478", "1969668032", "283006529", "1969809440", "1969601096", "1969298856", "1969331652", "1969385498", "1969674368", "1969565263", "1970144676", "1969745390", "1969947438", "1969734134", "1969801326", "1969324008", "1969259820", "1969535827", "1970072989", "1969771688", "1969437804", "1969507394", "1969509972", "1969751588", "283012808", "1969302888", "1970224440", "1969603532", "283011244", "1969501046", "1969887518", "1970153138", "1970267527", "1969941955", "1969421654", "1970013110", "1969544905", "1969839590", "1969876500", "1969674625", "1969337952", "1970046536", "1970090934", "1969419133", "1969517215", "1969787869", "1969298065", "1970149771", "1969422638", "1969504268", "1970025554", "1969776001", "1970138611", "1969316186", "1969547558", "1969689272", "283009727", "283015491", "1969526874", "1969662210", "1969536164", "1969320008", "1969893793", "1970158393", "1969365936", "1970194418", "1969942094", "1969631580", "1969704756", "1969920092", "1969712882", "1969791680", "1969408164", "1969754851", "1970205480", "1969840267", "1969443211", "1969706762", "1969692698", "1969751576", "1969486796", "1969286630", "1969686674", "1969833492", "1969294814", "1969472719", "1969685018", "283008559", "283011243", "1969680078", "1969545697", "1969646412", "1969442725", "1969692529" ]
"51878493","_notmichelle",[ "#nationaldogday" ],"http://pbs.twimg.com/profile_images/761977602173046786/4_utEHsD_normal.jpg",275,115,"en",1472270622663,"769309490038439936",[ "60789485", "2420931980", "2899776756", "127410795", "38747286", "1345516880", "236076395", "1242946609", "2567887488", "280777286", "2912446303", "1149916171", "3192577639", "239569380", "229974168", "389097282", "266336410", "1850301204", "2364414805", "812302213", "2318240348", "158634793", "542282350", "569664772", "766573472", "703551325", "168564432", "261054460", "402980453", "562547390", "539630318", "165167145", "22216387", "427568285", "61033129", "213519434", "373092437", "170762012", "273601960", "322108757", "1681816280", "357843027", "737471496", "406541143", "1084122632", "633477616", "537821327", "793079732", "2386380799", "479015607", "783354019", "365171478", "625002575", "2326207404", "1653286842", "1676964216", "2296617326", "1583692190", "1315393903", "377660026", "2235123476", "792779641", "351222527", "444993309", "588396446", "377629159", "469383424", "1726612471", "415230430", "942443390", "360924168", "318593248", "565022085", "319679735", "632508305", "377638254", "1392782078", "584483723", "377703135", "180463340", "564978577", "502517645", "1056960042", "285097108", "410245879", "159121042", "570399371", "502348447", "960927356", "377196638", "478142245", "335043809", "73546116", "11348282", "901302409", "53255593", "515983155", "391774800", "62351523", "724792351", "346296289", "152520627", "559053427", "508019115", "349996133", "378859519", "65120103", "190070557", "339868374", "417355200", "256729771", "16171898", "45266183", "16143507", "165258639" ]
We could start with something like this:
(Might need to rethink the use of | though. We could go for something more exotic like ╡
import pandas as pd
import io
import json
data = '''\
id,screenName,tags,avatar,followersCount,friendsCount,lang,lastSeen,tweetId,friends
"1969527638","LlngoMakeEmCum_",[ "#nationaldogday" ],"http://pbs.twimg.com/profile_images/534286217882652672/FNmiQYVO_normal.jpeg",319,112,"en",1472271687519,"769310701580083200",[ "1969574754", "1969295556", "1969284056", "1969612214", "1970067476", "1969797386", "1969430539", "1969840064", "1969698176", "1970005154", "283011644", "1969901029", "1969563175", "1969302314", "1969978662", "1969457936", "1969667533", "1969547821", "1969943478", "1969668032", "283006529", "1969809440", "1969601096", "1969298856", "1969331652", "1969385498", "1969674368", "1969565263", "1970144676", "1969745390", "1969947438", "1969734134", "1969801326", "1969324008", "1969259820", "1969535827", "1970072989", "1969771688", "1969437804", "1969507394", "1969509972", "1969751588", "283012808", "1969302888", "1970224440", "1969603532", "283011244", "1969501046", "1969887518", "1970153138", "1970267527", "1969941955", "1969421654", "1970013110", "1969544905", "1969839590", "1969876500", "1969674625", "1969337952", "1970046536", "1970090934", "1969419133", "1969517215", "1969787869", "1969298065", "1970149771", "1969422638", "1969504268", "1970025554", "1969776001", "1970138611", "1969316186", "1969547558", "1969689272", "283009727", "283015491", "1969526874", "1969662210", "1969536164", "1969320008", "1969893793", "1970158393", "1969365936", "1970194418", "1969942094", "1969631580", "1969704756", "1969920092", "1969712882", "1969791680", "1969408164", "1969754851", "1970205480", "1969840267", "1969443211", "1969706762", "1969692698", "1969751576", "1969486796", "1969286630", "1969686674", "1969833492", "1969294814", "1969472719", "1969685018", "283008559", "283011243", "1969680078", "1969545697", "1969646412", "1969442725", "1969692529" ]
"51878493","_notmichelle",[ "#nationaldogday" ],"http://pbs.twimg.com/profile_images/761977602173046786/4_utEHsD_normal.jpg",275,115,"en",1472270622663,"769309490038439936",[ "60789485", "2420931980", "2899776756", "127410795", "38747286", "1345516880", "236076395", "1242946609", "2567887488", "280777286", "2912446303", "1149916171", "3192577639", "239569380", "229974168", "389097282", "266336410", "1850301204", "2364414805", "812302213", "2318240348", "158634793", "542282350", "569664772", "766573472", "703551325", "168564432", "261054460", "402980453", "562547390", "539630318", "165167145", "22216387", "427568285", "61033129", "213519434", "373092437", "170762012", "273601960", "322108757", "1681816280", "357843027", "737471496", "406541143", "1084122632", "633477616", "537821327", "793079732", "2386380799", "479015607", "783354019", "365171478", "625002575", "2326207404", "1653286842", "1676964216", "2296617326", "1583692190", "1315393903", "377660026", "2235123476", "792779641", "351222527", "444993309", "588396446", "377629159", "469383424", "1726612471", "415230430", "942443390", "360924168", "318593248", "565022085", "319679735", "632508305", "377638254", "1392782078", "584483723", "377703135", "180463340", "564978577", "502517645", "1056960042", "285097108", "410245879", "159121042", "570399371", "502348447", "960927356", "377196638", "478142245", "335043809", "73546116", "11348282", "901302409", "53255593", "515983155", "391774800", "62351523", "724792351", "346296289", "152520627", "559053427", "508019115", "349996133", "378859519", "65120103", "190070557", "339868374", "417355200", "256729771", "16171898", "45266183", "16143507", "165258639" ]'''
# Create new separator (|) after 9th comma (',')
data = '\n'.join(['|'.join(row.split(',',9)) for row in data.split('\n')])
# REPLACE WITH THIS FOR REAL FILE
#with open('path/to/file') as f:
#data = '\n'.join(['|'.join(row.split(',',9)) for row in f.read().split('\n')])
# Read dataframe
df = pd.read_csv(io.StringIO(data), sep='|')
# Convert strings to objects with json module:
df['friends'] = df['friends'].apply(lambda x: json.loads(x))
df['tags'] = df['tags'].apply(lambda x: json.loads(x))
Safer approach:
import pandas as pd
import io
import json
with open('path/to/file') as f:
columns, *rows = [row.split(',',9) for row in f.read().split('\n')]
df = pd.DataFrame(rows, columns=columns)
# Convert strings to objects with json module:
df['friends'] = df['friends'].apply(lambda x: json.loads(x))
df['tags'] = df['tags'].apply(lambda x: json.loads(x))

Categories