Running 1000 functions gracefully using python multi-processing - python
I'm trying to receive stock data for about 1000 stocks, to speed up the process I'm using multiprocessing, unfortunately due to the large amount of stock data I'm trying to receive python as a whole just crashes.
Is there a way to use multiprocessing without python crashing, I understand it would still take some time to do all of the 1000 stocks, but all I need is to do this process as fast as possible.
import threading
import yfinance as yf
from multiprocessing import Process
database = {}
mylock = threading.RLock()
stocks = ['AAU', 'ABEO', 'ABEV', 'ABIO', 'ABUS', 'ACCO', 'ACER', 'ACIU', 'ACOR', 'ACRX', 'ACST', 'ACTG', 'ADAP', 'ADIL', 'ADMA', 'ADMP', 'ADT', 'ADTX', 'ADXS', 'AEG', 'AEHL', 'AEHR', 'AEMD', 'AESE', 'AEY', 'AEZS', 'AFIN', 'AFMD', 'AGEN', 'AGI', 'AGRO', 'AGRX', 'AGS', 'AGTC', 'AHPI', 'AHT', 'AIHS', 'AIKI', 'AIM', 'AINC', 'AIRI', 'AIV', 'AKBA', 'AKER', 'AKTX', 'ALNA', 'ALRN', 'ALSK', 'AM', 'AMBO', 'AMC', 'AMPE', 'AMPY', 'AMRN', 'AMRS', 'AMRX', 'AMTX', 'ANCN', 'ANH', 'ANIX', 'ANPC', 'ANTE', 'ANY', 'APDN', 'APM', 'APRE', 'APRN', 'APTO', 'APTS', 'APTX', 'APWC', 'AQMS', 'AQST', 'AR', 'ARAY', 'ARC', 'ARCO', 'ARDX', 'AREC', 'ARKO', 'ARLO', 'ARLP', 'AROC', 'ARPO', 'ARTL', 'ASC', 'ASLN', 'ASM', 'ASMB', 'ASRT', 'ASTC', 'ASX', 'ATAX', 'ATHE', 'ATHX', 'ATIF', 'ATNF', 'ATNM', 'ATOS', 'ATRS', 'ATXI', 'AUMN', 'AUTO', 'AUVI', 'AUY', 'AVCO', 'AVDL', 'AVEO', 'AVGR', 'AVXL', 'AWH', 'AWX', 'AXAS', 'AXL', 'AXU', 'AYRO', 'AYTU', 'AZRX', 'BBAR', 'BBD', 'BBGI', 'BBI', 'BBIG', 'BBVA', 'BBW', 'BCDA', 'BCLI', 'BCRX', 'BCS', 'BDR', 'BDSI', 'BEST', 'BGCP', 'BGI', 'BHAT', 'BHR', 'BHTG', 'BIMI', 'BIOC', 'BIOL', 'BKCC', 'BKD', 'BKEP', 'BKYI', 'BLCM', 'BLCT', 'BLIN', 'BLRX', 'BLU', 'BMRA', 'BNED', 'BNTC', 'BORR', 'BOXL', 'BPT', 'BPTH', 'BQ', 'BREZR', 'BRFS', 'BRN', 'BRPAR', 'BRQS', 'BRY', 'BSBR', 'BSGM', 'BSM', 'BSMX', 'BTG', 'BTU', 'BVXV', 'BW', 'BWEN', 'BXRX', 'BYFC', 'CAAP', 'CAAS', 'CALA', 'CAN', 'CANF', 'CAPR', 'CARV', 'CASA', 'CASI', 'CATB', 'CBAT', 'CBAY', 'CBIO', 'CBLI', 'CCO', 'CCRC', 'CCRN', 'CDE', 'CDEV', 'CDTX', 'CDXC', 'CEI', 'CEIX', 'CEMI', 'CERC', 'CERS', 'CETX', 'CFMS', 'CGIX', 'CHEK', 'CHMA', 'CHNR', 'CHRA', 'CHS', 'CHU', 'CIDM', 'CIG', 'CIO', 'CJJD', 'CKPT', 'CLBS', 'CLIR', 'CLNC', 'CLNY', 'CLPS', 'CLRB', 'CLS', 'CLSD', 'CLSN', 'CLVR', 'CLVS', 'CLXT', 'CMCM', 'CMO', 'CMRE', 'CMRX', 'CNDT', 'CNET', 'CNFR', 'CNSL', 'CNSP', 'CNTY', 'COCP', 'COGT', 'COMS', 'CORR', 'CPG', 'CPHI', 'CPRX', 'CPSH', 'CRBP', 'CREG', 'CREX', 'CRIS', 'CRK', 'CRKN', 'CRMD', 'CRNT', 'CSCW', 'CSLT', 'CSPR', 'CTEK', 'CTIB', 'CTIC', 'CTK', 'CTMX', 'CTRM', 'CTSO', 'CTXR', 'CVE', 'CVGI', 'CWBR', 'CX', 'CXW', 'CYCC', 'CYCN', 'CYRN', 'CYTH', 'DARE', 'DBVT', 'DFFN', 'DGLY', 'DHC', 'DHT', 'DLPN', 'DNK', 'DNN', 'DNOW', 'DOGZ', 'DPW', 'DRH', 'DRRX', 'DRTT', 'DS', 'DSKE', 'DSS', 'DSSI', 'DSX', 'DTEA', 'DTSS', 'DUO', 'DVAX', 'DXF', 'DXLG', 'DYNT', 'EARS', 'EBON', 'EBR', 'ECOR', 'EDSA', 'EGY', 'EIGR', 'ELVT', 'ELYS', 'EMAN', 'EMKR', 'EMX', 'ENBL', 'ENDP', 'ENG', 'ENIA', 'ENIC', 'ENLC', 'ENSV', 'ENTX', 'ENVB', 'ENZ', 'EOLS', 'EQ', 'EQX', 'ERF', 'ERJ', 'ESGC', 'ESTE', 'ET', 'ETM', 'ETRN', 'ETTX', 'EURN', 'EVC', 'EVFM', 'EVGN', 'EVK', 'EVOK', 'EXK', 'EXPR', 'EXTR', 'EYEG', 'EYES', 'EZGO', 'EZPW', 'FAMI', 'FBIO', 'FBP', 'FENG', 'FI', 'FINV', 'FLDM', 'FLMN', 'FLNT', 'FLY', 'FORD', 'FPAY', 'FRBK', 'FRO', 'FRSX', 'FSM', 'FSP', 'FTEK', 'FTFT', 'FTK', 'FURY', 'GAU', 'GBS', 'GCI', 'GEL', 'GEN', 'GENE', 'GEO', 'GERN', 'GFI', 'GGAL', 'GGB', 'GHSI', 'GLBS', 'GLDG', 'GLG', 'GLOG', 'GLOP', 'GLUU', 'GLYC', 'GMBL', 'GMDA', 'GMLP', 'GNCA', 'GNK', 'GNLN', 'GNPX', 'GNUS', 'GNW', 'GOGL', 'GOL', 'GORO', 'GOSS', 'GOVX', 'GPL', 'GPMT', 'GPRO', 'GRIL', 'GRNQ', 'GSAT', 'GSKY', 'GSM', 'GSS', 'GSV', 'GTE', 'GTEC', 'GTT', 'GV', 'GVP', 'HAPP', 'HBM', 'HCDI', 'HCHC', 'HDSN', 'HEPA', 'HEXO', 'HGSH', 'HIL', 'HIMX', 'HJLI', 'HL', 'HLIT', 'HLX', 'HMHC', 'HMY', 'HNRG', 'HOFV', 'HOTH', 'HSTO', 'HT', 'HTBX', 'HUGE', 'HUSA', 'HUSN', 'HX', 'HYRE', 'IAG', 'IBIO', 'ICD', 'ICON', 'ID', 'IDEX', 'IDRA', 'IFMK', 'IFRX', 'IGC', 'IHT', 'IKT', 'IMAC', 'IMGN', 'IMMP', 'IMTE', 'IMV', 'INDO', 'INFI', 'ING', 'INN', 'INOD', 'INPX', 'INUV', 'IO', 'IPDN', 'IRIX', 'ISEE', 'ISIG', 'ISR', 'ITP', 'ITRM', 'ITUB', 'IVR', 'IZEA', 'JAGX', 'JE', 'JFIN', 'JFU', 'JG', 'JIH.W', 'JILL', 'JOB', 'JUPW', 'KALA', 'KBNT', 'KBSF', 'KDMN', 'KERN', 'KGC', 'KIN', 'KIQ', 'KMPH', 'KNDI', 'KODK', 'KOPN', 'KOS', 'KRKR', 'KRMD', 'KTRA', 'KUKE', 'KXIN', 'KZIA', 'LCI', 'LCTX', 'LEAF', 'LEE', 'LGHL', 'LIFE', 'LITB', 'LIVX', 'LIZI', 'LJPC', 'LKCO', 'LLIT', 'LLNW', 'LMFA', 'LMNL', 'LODE', 'LOMA', 'LPCN', 'LPTH', 'LPTX', 'LQDA', 'LSEA', 'LTBR', 'LTRPA', 'LX', 'LXRX', 'LYG', 'MACK', 'MARK', 'MBI', 'MBII', 'MBIO', 'MBRX', 'MBT', 'MCEP', 'MCF', 'MDGS', 'MDXG', 'MEIP', 'MESA', 'MESO', 'METX', 'MFA', 'MFG', 'MFGP', 'MFH', 'MGI', 'MGY', 'MHLD', 'MICT', 'MIN', 'MIND', 'MITO', 'MITT', 'MKD', 'MKGI', 'MLND', 'MLSS', 'MNKD', 'MOGO', 'MOGU', 'MOHO', 'MOSY', 'MOTS', 'MOXC', 'MPLN', 'MRC', 'MREO', 'MRIN', 'MRKR', 'MRO', 'MSN', 'MTA', 'MTC', 'MTL', 'MTNB', 'MTP', 'MTSL', 'MUFG', 'MUX', 'MVIS', 'MYSZ', 'MYT', 'NAK', 'NAKD', 'NAOV', 'NAT', 'NAVB', 'NBEV', 'NBRV', 'NBSE', 'NBY', 'NCMI', 'NCNA', 'NDRA', 'NEOS', 'NEPT', 'NERV', 'NES', 'NEW', 'NEX', 'NG', 'NGD', 'NGL', 'NH', 'NLY', 'NMCI', 'NMRK', 'NMTR', 'NNVC', 'NOK', 'NOVN', 'NR', 'NRZ', 'NSCO', 'NSPR', 'NTEC', 'NTN', 'NURO', 'NVCN', 'NVIV', 'NWG', 'NXE', 'NXTD', 'NYMT', 'OBLG', 'OBLN', 'OBSV', 'OCG', 'OCGN', 'OCSL', 'OCX', 'OEG', 'OGEN', 'OGI', 'OIBR.C', 'OII', 'OIIM', 'OIS', 'ONCT', 'ONCY', 'ONTX', 'OPGN', 'OPK', 'OPTN', 'OPTT', 'ORBC', 'ORC', 'ORMP', 'ORN', 'ORTX', 'OSMT', 'OSW', 'OTIC', 'OTLK', 'OVID', 'OXBR', 'OXLC', 'PAA', 'PAE', 'PAGP', 'PAVM', 'PAYS', 'PBF', 'PBI', 'PDSB', 'PED', 'PEI', 'PEIX', 'PFMT', 'PGEN', 'PGRE', 'PHAS', 'PHIO', 'PHUN', 'PIRS', 'PIXY', 'PLAG', 'PLG', 'PLIN', 'PLM', 'PLYA', 'PNNT', 'POAI', 'POWW', 'PPBT', 'PPR', 'PPSI', 'PPT', 'PROG', 'PRPO', 'PRQR', 'PRTK', 'PRTY', 'PSEC', 'PSTI', 'PSTV', 'PT', 'PTE', 'PTEN', 'PTMN', 'PTN', 'PULM', 'PUMP', 'PVL', 'PXLW', 'PXS', 'QD', 'QEP', 'QIWI', 'QLGN', 'QLI', 'QTNT', 'QTT', 'QUAD', 'QUOT', 'RAIL', 'RAVE', 'RBBN', 'RCON', 'RDHL', 'REED', 'REFR', 'REI', 'REPH', 'RES', 'RESN', 'RETO', 'RFP', 'RGLS', 'RGS', 'RHE', 'RIBT', 'RIG', 'RIGL', 'RKDA', 'RLH', 'RMED', 'RMTI', 'RNWK', 'RPAI', 'RPT', 'RRC', 'RRD', 'RTLR', 'RUHN', 'RWLK', 'RWT', 'RYAM', 'SALM', 'SAN', 'SAND', 'SB', 'SBBP', 'SBS', 'SCKT', 'SCOR', 'SCYX', 'SD', 'SDPI', 'SEAC', 'SEEL', 'SELB', 'SENS', 'SESN', 'SFET', 'SFL', 'SFT', 'SGBX', 'SGLB', 'SGOC', 'SHIP', 'SID', 'SIEN', 'SIF', 'SIFY', 'SILV', 'SINO', 'SINT', 'SIOX', 'SIRI', 'SLCA', 'SLDB', 'SLGG', 'SLRX', 'SLS', 'SM', 'SMFG', 'SMSI', 'SMTS', 'SMTX', 'SNCA', 'SNCR', 'SND', 'SNDE', 'SNDL', 'SNES', 'SNGX', 'SNMP', 'SNOA', 'SNR', 'SNSS', 'SOI', 'SOLO', 'SONM', 'SONN', 'SOS', 'SPCB', 'SPPI', 'SQFT', 'SQNS', 'SREV', 'SRGA', 'SSL', 'STAF', 'STCN', 'STON', 'STSA', 'SUP', 'SUPV', 'SVM', 'SVRA', 'SWN', 'SXC', 'SXTC', 'SYBX', 'SYN', 'SYPR', 'TACO', 'TALO', 'TANH', 'TAOP', 'TAST', 'TAT', 'TATT', 'TBLT', 'TCCO', 'TCDA', 'TCON', 'TEDU', 'TEF', 'TELL', 'TENX', 'TEO', 'TGA', 'TGB', 'TGC', 'THM', 'THMO', 'TK', 'TKAT', 'TKC', 'TLGT', 'TLMD', 'TLSA', 'TLYS', 'TMBR', 'TMDI', 'TMQ', 'TMST', 'TNAV', 'TNXP', 'TOPS', 'TOUR', 'TPRE', 'TRCH', 'TRIB', 'TRIT', 'TRST', 'TRUE', 'TRVG', 'TRVN', 'TRX', 'TRXC', 'TTI', 'TTNP', 'TTOO', 'TUSK', 'TV', 'TWI', 'TWO', 'TXMD', 'TYME', 'UAMY', 'UBX', 'UEC', 'UEPS', 'UGP', 'UMC', 'UONE', 'UONEK', 'URG', 'USAS', 'USAT', 'USEG', 'USIO', 'USWS', 'USX', 'UTSI', 'UUUU', 'UWMC', 'UXIN', 'VBIV', 'VBLT', 'VCNX', 'VEDL', 'VEON', 'VERB', 'VERO', 'VERU', 'VET', 'VGZ', 'VHC', 'VIOT', 'VIRI', 'VISL', 'VIV', 'VIVE', 'VKTX', 'VNTR', 'VRA', 'VRAY', 'VSTM', 'VTGN', 'VTNR', 'VTVT', 'VVOS', 'VXRT', 'VYGR', 'VYNE', 'WATT', 'WEI', 'WETF', 'WIMI', 'WISA', 'WIT', 'WKEY', 'WMC', 'WORX', 'WPRT', 'WPX', 'WRAP', 'WRN', 'WSR', 'WTER', 'WTI', 'WTRH', 'WTTR', 'WVE', 'WWR', 'XAIR', 'XAN', 'XBIO', 'XCUR', 'XELA', 'XELB', 'XERS', 'XNET', 'XPL', 'XSPA', 'XXII', 'YCBD', 'YGYI', 'YJ', 'YPF', 'YRCW', 'YTRA', 'YVR', 'ZAGG', 'ZIOP', 'ZIXI', 'ZKIN', 'ZNGA', 'ZOM', 'ZSAN', 'ZVO', 'ZYNE', '']
class info():
def __init__(self, name):
self.name = name
self.goo()
def goo(self):
self.x = {'ticker':self.name, 'name': str(yf.Ticker(self.name).info['longName'])}
print(self.x, )
with mylock:
database[self.name] = self.x
def run_in_p():
proc = []
for name_s in stocks:
p = Process(target=info, args=(name_s, ))
p.start()
proc.append(p)
for p in proc:
p.join()
print(database)
if __name__ == "__main__":
run_in_p()
Edit:
Here's the error window that pops up
Along with this error window I get another error in the python console
raise HTTPError(req.full_url, code, msg, hdrs, fp)
urllib.error.HTTPError: HTTP Error 404: Not Found
I'd like to offer a solution using a package called yahooquery. Disclaimer: I am the author of the package. You can get that same data in a few seconds with the following:
from yahooquery import Ticker
stocks = ['AAU', 'ABEO', 'ABEV', 'ABIO', 'ABUS', 'ACCO', 'ACER', 'ACIU', 'ACOR', 'ACRX', 'ACST', 'ACTG', 'ADAP', 'ADIL', 'ADMA', 'ADMP', 'ADT', 'ADTX', 'ADXS', 'AEG', 'AEHL', 'AEHR', 'AEMD', 'AESE', 'AEY', 'AEZS', 'AFIN', 'AFMD', 'AGEN', 'AGI', 'AGRO', 'AGRX', 'AGS', 'AGTC', 'AHPI', 'AHT', 'AIHS', 'AIKI', 'AIM', 'AINC', 'AIRI', 'AIV', 'AKBA', 'AKER', 'AKTX', 'ALNA', 'ALRN', 'ALSK', 'AM', 'AMBO', 'AMC', 'AMPE', 'AMPY', 'AMRN', 'AMRS', 'AMRX', 'AMTX', 'ANCN', 'ANH', 'ANIX', 'ANPC', 'ANTE', 'ANY', 'APDN', 'APM', 'APRE', 'APRN', 'APTO', 'APTS', 'APTX', 'APWC', 'AQMS', 'AQST', 'AR', 'ARAY', 'ARC', 'ARCO', 'ARDX', 'AREC', 'ARKO', 'ARLO', 'ARLP', 'AROC', 'ARPO', 'ARTL', 'ASC', 'ASLN', 'ASM', 'ASMB', 'ASRT', 'ASTC', 'ASX', 'ATAX', 'ATHE', 'ATHX', 'ATIF', 'ATNF', 'ATNM', 'ATOS', 'ATRS', 'ATXI', 'AUMN', 'AUTO', 'AUVI', 'AUY', 'AVCO', 'AVDL', 'AVEO', 'AVGR', 'AVXL', 'AWH', 'AWX', 'AXAS', 'AXL', 'AXU', 'AYRO', 'AYTU', 'AZRX', 'BBAR', 'BBD', 'BBGI', 'BBI', 'BBIG', 'BBVA', 'BBW', 'BCDA', 'BCLI', 'BCRX', 'BCS', 'BDR', 'BDSI', 'BEST', 'BGCP', 'BGI', 'BHAT', 'BHR', 'BHTG', 'BIMI', 'BIOC', 'BIOL', 'BKCC', 'BKD', 'BKEP', 'BKYI', 'BLCM', 'BLCT', 'BLIN', 'BLRX', 'BLU', 'BMRA', 'BNED', 'BNTC', 'BORR', 'BOXL', 'BPT', 'BPTH', 'BQ', 'BREZR', 'BRFS', 'BRN', 'BRPAR', 'BRQS', 'BRY', 'BSBR', 'BSGM', 'BSM', 'BSMX', 'BTG', 'BTU', 'BVXV', 'BW', 'BWEN', 'BXRX', 'BYFC', 'CAAP', 'CAAS', 'CALA', 'CAN', 'CANF', 'CAPR', 'CARV', 'CASA', 'CASI', 'CATB', 'CBAT', 'CBAY', 'CBIO', 'CBLI', 'CCO', 'CCRC', 'CCRN', 'CDE', 'CDEV', 'CDTX', 'CDXC', 'CEI', 'CEIX', 'CEMI', 'CERC', 'CERS', 'CETX', 'CFMS', 'CGIX', 'CHEK', 'CHMA', 'CHNR', 'CHRA', 'CHS', 'CHU', 'CIDM', 'CIG', 'CIO', 'CJJD', 'CKPT', 'CLBS', 'CLIR', 'CLNC', 'CLNY', 'CLPS', 'CLRB', 'CLS', 'CLSD', 'CLSN', 'CLVR', 'CLVS', 'CLXT', 'CMCM', 'CMO', 'CMRE', 'CMRX', 'CNDT', 'CNET', 'CNFR', 'CNSL', 'CNSP', 'CNTY', 'COCP', 'COGT', 'COMS', 'CORR', 'CPG', 'CPHI', 'CPRX', 'CPSH', 'CRBP', 'CREG', 'CREX', 'CRIS', 'CRK', 'CRKN', 'CRMD', 'CRNT', 'CSCW', 'CSLT', 'CSPR', 'CTEK', 'CTIB', 'CTIC', 'CTK', 'CTMX', 'CTRM', 'CTSO', 'CTXR', 'CVE', 'CVGI', 'CWBR', 'CX', 'CXW', 'CYCC', 'CYCN', 'CYRN', 'CYTH', 'DARE', 'DBVT', 'DFFN', 'DGLY', 'DHC', 'DHT', 'DLPN', 'DNK', 'DNN', 'DNOW', 'DOGZ', 'DPW', 'DRH', 'DRRX', 'DRTT', 'DS', 'DSKE', 'DSS', 'DSSI', 'DSX', 'DTEA', 'DTSS', 'DUO', 'DVAX', 'DXF', 'DXLG', 'DYNT', 'EARS', 'EBON', 'EBR', 'ECOR', 'EDSA', 'EGY', 'EIGR', 'ELVT', 'ELYS', 'EMAN', 'EMKR', 'EMX', 'ENBL', 'ENDP', 'ENG', 'ENIA', 'ENIC', 'ENLC', 'ENSV', 'ENTX', 'ENVB', 'ENZ', 'EOLS', 'EQ', 'EQX', 'ERF', 'ERJ', 'ESGC', 'ESTE', 'ET', 'ETM', 'ETRN', 'ETTX', 'EURN', 'EVC', 'EVFM', 'EVGN', 'EVK', 'EVOK', 'EXK', 'EXPR', 'EXTR', 'EYEG', 'EYES', 'EZGO', 'EZPW', 'FAMI', 'FBIO', 'FBP', 'FENG', 'FI', 'FINV', 'FLDM', 'FLMN', 'FLNT', 'FLY', 'FORD', 'FPAY', 'FRBK', 'FRO', 'FRSX', 'FSM', 'FSP', 'FTEK', 'FTFT', 'FTK', 'FURY', 'GAU', 'GBS', 'GCI', 'GEL', 'GEN', 'GENE', 'GEO', 'GERN', 'GFI', 'GGAL', 'GGB', 'GHSI', 'GLBS', 'GLDG', 'GLG', 'GLOG', 'GLOP', 'GLUU', 'GLYC', 'GMBL', 'GMDA', 'GMLP', 'GNCA', 'GNK', 'GNLN', 'GNPX', 'GNUS', 'GNW', 'GOGL', 'GOL', 'GORO', 'GOSS', 'GOVX', 'GPL', 'GPMT', 'GPRO', 'GRIL', 'GRNQ', 'GSAT', 'GSKY', 'GSM', 'GSS', 'GSV', 'GTE', 'GTEC', 'GTT', 'GV', 'GVP', 'HAPP', 'HBM', 'HCDI', 'HCHC', 'HDSN', 'HEPA', 'HEXO', 'HGSH', 'HIL', 'HIMX', 'HJLI', 'HL', 'HLIT', 'HLX', 'HMHC', 'HMY', 'HNRG', 'HOFV', 'HOTH', 'HSTO', 'HT', 'HTBX', 'HUGE', 'HUSA', 'HUSN', 'HX', 'HYRE', 'IAG', 'IBIO', 'ICD', 'ICON', 'ID', 'IDEX', 'IDRA', 'IFMK', 'IFRX', 'IGC', 'IHT', 'IKT', 'IMAC', 'IMGN', 'IMMP', 'IMTE', 'IMV', 'INDO', 'INFI', 'ING', 'INN', 'INOD', 'INPX', 'INUV', 'IO', 'IPDN', 'IRIX', 'ISEE', 'ISIG', 'ISR', 'ITP', 'ITRM', 'ITUB', 'IVR', 'IZEA', 'JAGX', 'JE', 'JFIN', 'JFU', 'JG', 'JIH.W', 'JILL', 'JOB', 'JUPW', 'KALA', 'KBNT', 'KBSF', 'KDMN', 'KERN', 'KGC', 'KIN', 'KIQ', 'KMPH', 'KNDI', 'KODK', 'KOPN', 'KOS', 'KRKR', 'KRMD', 'KTRA', 'KUKE', 'KXIN', 'KZIA', 'LCI', 'LCTX', 'LEAF', 'LEE', 'LGHL', 'LIFE', 'LITB', 'LIVX', 'LIZI', 'LJPC', 'LKCO', 'LLIT', 'LLNW', 'LMFA', 'LMNL', 'LODE', 'LOMA', 'LPCN', 'LPTH', 'LPTX', 'LQDA', 'LSEA', 'LTBR', 'LTRPA', 'LX', 'LXRX', 'LYG', 'MACK', 'MARK', 'MBI', 'MBII', 'MBIO', 'MBRX', 'MBT', 'MCEP', 'MCF', 'MDGS', 'MDXG', 'MEIP', 'MESA', 'MESO', 'METX', 'MFA', 'MFG', 'MFGP', 'MFH', 'MGI', 'MGY', 'MHLD', 'MICT', 'MIN', 'MIND', 'MITO', 'MITT', 'MKD', 'MKGI', 'MLND', 'MLSS', 'MNKD', 'MOGO', 'MOGU', 'MOHO', 'MOSY', 'MOTS', 'MOXC', 'MPLN', 'MRC', 'MREO', 'MRIN', 'MRKR', 'MRO', 'MSN', 'MTA', 'MTC', 'MTL', 'MTNB', 'MTP', 'MTSL', 'MUFG', 'MUX', 'MVIS', 'MYSZ', 'MYT', 'NAK', 'NAKD', 'NAOV', 'NAT', 'NAVB', 'NBEV', 'NBRV', 'NBSE', 'NBY', 'NCMI', 'NCNA', 'NDRA', 'NEOS', 'NEPT', 'NERV', 'NES', 'NEW', 'NEX', 'NG', 'NGD', 'NGL', 'NH', 'NLY', 'NMCI', 'NMRK', 'NMTR', 'NNVC', 'NOK', 'NOVN', 'NR', 'NRZ', 'NSCO', 'NSPR', 'NTEC', 'NTN', 'NURO', 'NVCN', 'NVIV', 'NWG', 'NXE', 'NXTD', 'NYMT', 'OBLG', 'OBLN', 'OBSV', 'OCG', 'OCGN', 'OCSL', 'OCX', 'OEG', 'OGEN', 'OGI', 'OIBR.C', 'OII', 'OIIM', 'OIS', 'ONCT', 'ONCY', 'ONTX', 'OPGN', 'OPK', 'OPTN', 'OPTT', 'ORBC', 'ORC', 'ORMP', 'ORN', 'ORTX', 'OSMT', 'OSW', 'OTIC', 'OTLK', 'OVID', 'OXBR', 'OXLC', 'PAA', 'PAE', 'PAGP', 'PAVM', 'PAYS', 'PBF', 'PBI', 'PDSB', 'PED', 'PEI', 'PEIX', 'PFMT', 'PGEN', 'PGRE', 'PHAS', 'PHIO', 'PHUN', 'PIRS', 'PIXY', 'PLAG', 'PLG', 'PLIN', 'PLM', 'PLYA', 'PNNT', 'POAI', 'POWW', 'PPBT', 'PPR', 'PPSI', 'PPT', 'PROG', 'PRPO', 'PRQR', 'PRTK', 'PRTY', 'PSEC', 'PSTI', 'PSTV', 'PT', 'PTE', 'PTEN', 'PTMN', 'PTN', 'PULM', 'PUMP', 'PVL', 'PXLW', 'PXS', 'QD', 'QEP', 'QIWI', 'QLGN', 'QLI', 'QTNT', 'QTT', 'QUAD', 'QUOT', 'RAIL', 'RAVE', 'RBBN', 'RCON', 'RDHL', 'REED', 'REFR', 'REI', 'REPH', 'RES', 'RESN', 'RETO', 'RFP', 'RGLS', 'RGS', 'RHE', 'RIBT', 'RIG', 'RIGL', 'RKDA', 'RLH', 'RMED', 'RMTI', 'RNWK', 'RPAI', 'RPT', 'RRC', 'RRD', 'RTLR', 'RUHN', 'RWLK', 'RWT', 'RYAM', 'SALM', 'SAN', 'SAND', 'SB', 'SBBP', 'SBS', 'SCKT', 'SCOR', 'SCYX', 'SD', 'SDPI', 'SEAC', 'SEEL', 'SELB', 'SENS', 'SESN', 'SFET', 'SFL', 'SFT', 'SGBX', 'SGLB', 'SGOC', 'SHIP', 'SID', 'SIEN', 'SIF', 'SIFY', 'SILV', 'SINO', 'SINT', 'SIOX', 'SIRI', 'SLCA', 'SLDB', 'SLGG', 'SLRX', 'SLS', 'SM', 'SMFG', 'SMSI', 'SMTS', 'SMTX', 'SNCA', 'SNCR', 'SND', 'SNDE', 'SNDL', 'SNES', 'SNGX', 'SNMP', 'SNOA', 'SNR', 'SNSS', 'SOI', 'SOLO', 'SONM', 'SONN', 'SOS', 'SPCB', 'SPPI', 'SQFT', 'SQNS', 'SREV', 'SRGA', 'SSL', 'STAF', 'STCN', 'STON', 'STSA', 'SUP', 'SUPV', 'SVM', 'SVRA', 'SWN', 'SXC', 'SXTC', 'SYBX', 'SYN', 'SYPR', 'TACO', 'TALO', 'TANH', 'TAOP', 'TAST', 'TAT', 'TATT', 'TBLT', 'TCCO', 'TCDA', 'TCON', 'TEDU', 'TEF', 'TELL', 'TENX', 'TEO', 'TGA', 'TGB', 'TGC', 'THM', 'THMO', 'TK', 'TKAT', 'TKC', 'TLGT', 'TLMD', 'TLSA', 'TLYS', 'TMBR', 'TMDI', 'TMQ', 'TMST', 'TNAV', 'TNXP', 'TOPS', 'TOUR', 'TPRE', 'TRCH', 'TRIB', 'TRIT', 'TRST', 'TRUE', 'TRVG', 'TRVN', 'TRX', 'TRXC', 'TTI', 'TTNP', 'TTOO', 'TUSK', 'TV', 'TWI', 'TWO', 'TXMD', 'TYME', 'UAMY', 'UBX', 'UEC', 'UEPS', 'UGP', 'UMC', 'UONE', 'UONEK', 'URG', 'USAS', 'USAT', 'USEG', 'USIO', 'USWS', 'USX', 'UTSI', 'UUUU', 'UWMC', 'UXIN', 'VBIV', 'VBLT', 'VCNX', 'VEDL', 'VEON', 'VERB', 'VERO', 'VERU', 'VET', 'VGZ', 'VHC', 'VIOT', 'VIRI', 'VISL', 'VIV', 'VIVE', 'VKTX', 'VNTR', 'VRA', 'VRAY', 'VSTM', 'VTGN', 'VTNR', 'VTVT', 'VVOS', 'VXRT', 'VYGR', 'VYNE', 'WATT', 'WEI', 'WETF', 'WIMI', 'WISA', 'WIT', 'WKEY', 'WMC', 'WORX', 'WPRT', 'WPX', 'WRAP', 'WRN', 'WSR', 'WTER', 'WTI', 'WTRH', 'WTTR', 'WVE', 'WWR', 'XAIR', 'XAN', 'XBIO', 'XCUR', 'XELA', 'XELB', 'XERS', 'XNET', 'XPL', 'XSPA', 'XXII', 'YCBD', 'YGYI', 'YJ', 'YPF', 'YRCW', 'YTRA', 'YVR', 'ZAGG', 'ZIOP', 'ZIXI', 'ZKIN', 'ZNGA', 'ZOM', 'ZSAN', 'ZVO', 'ZYNE', '']
# validate is optional but will go through your list and keep only valid symbols
t = Ticker(symbols, validate=True)
data = t.quotes
d = {k: v['longName'] for k, v in data.items()}
Ok,
here is one way to obtain what you want in about 2min.
Some tickers are bad, that's why it crashes.
Here's the code. I use joblib for threading or multiprocess since it doesn't work in my env. But, that's the spirit.
%%time
import joblib
from joblib import Parallel,delayed
WRONG_TICKERS = []
database = {}
stocks = ['AAU', 'ABEO', 'ABEV', 'ABIO', 'ABUS', 'ACCO', 'ACER', 'ACIU', 'ACOR', 'ACRX', 'ACST', 'ACTG', 'ADAP', 'ADIL', 'ADMA', 'ADMP', 'ADT', 'ADTX', 'ADXS', 'AEG', 'AEHL', 'AEHR', 'AEMD', 'AESE', 'AEY', 'AEZS', 'AFIN', 'AFMD', 'AGEN', 'AGI', 'AGRO', 'AGRX', 'AGS', 'AGTC', 'AHPI', 'AHT', 'AIHS', 'AIKI', 'AIM', 'AINC', 'AIRI', 'AIV', 'AKBA', 'AKER', 'AKTX', 'ALNA', 'ALRN', 'ALSK', 'AM', 'AMBO', 'AMC', 'AMPE', 'AMPY', 'AMRN', 'AMRS', 'AMRX', 'AMTX', 'ANCN', 'ANH', 'ANIX', 'ANPC', 'ANTE', 'ANY', 'APDN', 'APM', 'APRE', 'APRN', 'APTO', 'APTS', 'APTX', 'APWC', 'AQMS', 'AQST', 'AR', 'ARAY', 'ARC', 'ARCO', 'ARDX', 'AREC', 'ARKO', 'ARLO', 'ARLP', 'AROC', 'ARPO', 'ARTL', 'ASC', 'ASLN', 'ASM', 'ASMB', 'ASRT', 'ASTC', 'ASX', 'ATAX', 'ATHE', 'ATHX', 'ATIF', 'ATNF', 'ATNM', 'ATOS', 'ATRS', 'ATXI', 'AUMN', 'AUTO', 'AUVI', 'AUY', 'AVCO', 'AVDL', 'AVEO', 'AVGR', 'AVXL', 'AWH', 'AWX', 'AXAS', 'AXL', 'AXU', 'AYRO', 'AYTU', 'AZRX', 'BBAR', 'BBD', 'BBGI', 'BBI', 'BBIG', 'BBVA', 'BBW', 'BCDA', 'BCLI', 'BCRX', 'BCS', 'BDR', 'BDSI', 'BEST', 'BGCP', 'BGI', 'BHAT', 'BHR', 'BHTG', 'BIMI', 'BIOC', 'BIOL', 'BKCC', 'BKD', 'BKEP', 'BKYI', 'BLCM', 'BLCT', 'BLIN', 'BLRX', 'BLU', 'BMRA', 'BNED', 'BNTC', 'BORR', 'BOXL', 'BPT', 'BPTH', 'BQ', 'BREZR', 'BRFS', 'BRN', 'BRPAR', 'BRQS', 'BRY', 'BSBR', 'BSGM', 'BSM', 'BSMX', 'BTG', 'BTU', 'BVXV', 'BW', 'BWEN', 'BXRX', 'BYFC', 'CAAP', 'CAAS', 'CALA', 'CAN', 'CANF', 'CAPR', 'CARV', 'CASA', 'CASI', 'CATB', 'CBAT', 'CBAY', 'CBIO', 'CBLI', 'CCO', 'CCRC', 'CCRN', 'CDE', 'CDEV', 'CDTX', 'CDXC', 'CEI', 'CEIX', 'CEMI', 'CERC', 'CERS', 'CETX', 'CFMS', 'CGIX', 'CHEK', 'CHMA', 'CHNR', 'CHRA', 'CHS', 'CHU', 'CIDM', 'CIG', 'CIO', 'CJJD', 'CKPT', 'CLBS', 'CLIR', 'CLNC', 'CLNY', 'CLPS', 'CLRB', 'CLS', 'CLSD', 'CLSN', 'CLVR', 'CLVS', 'CLXT', 'CMCM', 'CMO', 'CMRE', 'CMRX', 'CNDT', 'CNET', 'CNFR', 'CNSL', 'CNSP', 'CNTY', 'COCP', 'COGT', 'COMS', 'CORR', 'CPG', 'CPHI', 'CPRX', 'CPSH', 'CRBP', 'CREG', 'CREX', 'CRIS', 'CRK', 'CRKN', 'CRMD', 'CRNT', 'CSCW', 'CSLT', 'CSPR', 'CTEK', 'CTIB', 'CTIC', 'CTK', 'CTMX', 'CTRM', 'CTSO', 'CTXR', 'CVE', 'CVGI', 'CWBR', 'CX', 'CXW', 'CYCC', 'CYCN', 'CYRN', 'CYTH', 'DARE', 'DBVT', 'DFFN', 'DGLY', 'DHC', 'DHT', 'DLPN', 'DNK', 'DNN', 'DNOW', 'DOGZ', 'DPW', 'DRH', 'DRRX', 'DRTT', 'DS', 'DSKE', 'DSS', 'DSSI', 'DSX', 'DTEA', 'DTSS', 'DUO', 'DVAX', 'DXF', 'DXLG', 'DYNT', 'EARS', 'EBON', 'EBR', 'ECOR', 'EDSA', 'EGY', 'EIGR', 'ELVT', 'ELYS', 'EMAN', 'EMKR', 'EMX', 'ENBL', 'ENDP', 'ENG', 'ENIA', 'ENIC', 'ENLC', 'ENSV', 'ENTX', 'ENVB', 'ENZ', 'EOLS', 'EQ', 'EQX', 'ERF', 'ERJ', 'ESGC', 'ESTE', 'ET', 'ETM', 'ETRN', 'ETTX', 'EURN', 'EVC', 'EVFM', 'EVGN', 'EVK', 'EVOK', 'EXK', 'EXPR', 'EXTR', 'EYEG', 'EYES', 'EZGO', 'EZPW', 'FAMI', 'FBIO', 'FBP', 'FENG', 'FI', 'FINV', 'FLDM', 'FLMN', 'FLNT', 'FLY', 'FORD', 'FPAY', 'FRBK', 'FRO', 'FRSX', 'FSM', 'FSP', 'FTEK', 'FTFT', 'FTK', 'FURY', 'GAU', 'GBS', 'GCI', 'GEL', 'GEN', 'GENE', 'GEO', 'GERN', 'GFI', 'GGAL', 'GGB', 'GHSI', 'GLBS', 'GLDG', 'GLG', 'GLOG', 'GLOP', 'GLUU', 'GLYC', 'GMBL', 'GMDA', 'GMLP', 'GNCA', 'GNK', 'GNLN', 'GNPX', 'GNUS', 'GNW', 'GOGL', 'GOL', 'GORO', 'GOSS', 'GOVX', 'GPL', 'GPMT', 'GPRO', 'GRIL', 'GRNQ', 'GSAT', 'GSKY', 'GSM', 'GSS', 'GSV', 'GTE', 'GTEC', 'GTT', 'GV', 'GVP', 'HAPP', 'HBM', 'HCDI', 'HCHC', 'HDSN', 'HEPA', 'HEXO', 'HGSH', 'HIL', 'HIMX', 'HJLI', 'HL', 'HLIT', 'HLX', 'HMHC', 'HMY', 'HNRG', 'HOFV', 'HOTH', 'HSTO', 'HT', 'HTBX', 'HUGE', 'HUSA', 'HUSN', 'HX', 'HYRE', 'IAG', 'IBIO', 'ICD', 'ICON', 'ID', 'IDEX', 'IDRA', 'IFMK', 'IFRX', 'IGC', 'IHT', 'IKT', 'IMAC', 'IMGN', 'IMMP', 'IMTE', 'IMV', 'INDO', 'INFI', 'ING', 'INN', 'INOD', 'INPX', 'INUV', 'IO', 'IPDN', 'IRIX', 'ISEE', 'ISIG', 'ISR', 'ITP', 'ITRM', 'ITUB', 'IVR', 'IZEA', 'JAGX', 'JE', 'JFIN', 'JFU', 'JG', 'JIH.W', 'JILL', 'JOB', 'JUPW', 'KALA', 'KBNT', 'KBSF', 'KDMN', 'KERN', 'KGC', 'KIN', 'KIQ', 'KMPH', 'KNDI', 'KODK', 'KOPN', 'KOS', 'KRKR', 'KRMD', 'KTRA', 'KUKE', 'KXIN', 'KZIA', 'LCI', 'LCTX', 'LEAF', 'LEE', 'LGHL', 'LIFE', 'LITB', 'LIVX', 'LIZI', 'LJPC', 'LKCO', 'LLIT', 'LLNW', 'LMFA', 'LMNL', 'LODE', 'LOMA', 'LPCN', 'LPTH', 'LPTX', 'LQDA', 'LSEA', 'LTBR', 'LTRPA', 'LX', 'LXRX', 'LYG', 'MACK', 'MARK', 'MBI', 'MBII', 'MBIO', 'MBRX', 'MBT', 'MCEP', 'MCF', 'MDGS', 'MDXG', 'MEIP', 'MESA', 'MESO', 'METX', 'MFA', 'MFG', 'MFGP', 'MFH', 'MGI', 'MGY', 'MHLD', 'MICT', 'MIN', 'MIND', 'MITO', 'MITT', 'MKD', 'MKGI', 'MLND', 'MLSS', 'MNKD', 'MOGO', 'MOGU', 'MOHO', 'MOSY', 'MOTS', 'MOXC', 'MPLN', 'MRC', 'MREO', 'MRIN', 'MRKR', 'MRO', 'MSN', 'MTA', 'MTC', 'MTL', 'MTNB', 'MTP', 'MTSL', 'MUFG', 'MUX', 'MVIS', 'MYSZ', 'MYT', 'NAK', 'NAKD', 'NAOV', 'NAT', 'NAVB', 'NBEV', 'NBRV', 'NBSE', 'NBY', 'NCMI', 'NCNA', 'NDRA', 'NEOS', 'NEPT', 'NERV', 'NES', 'NEW', 'NEX', 'NG', 'NGD', 'NGL', 'NH', 'NLY', 'NMCI', 'NMRK', 'NMTR', 'NNVC', 'NOK', 'NOVN', 'NR', 'NRZ', 'NSCO', 'NSPR', 'NTEC', 'NTN', 'NURO', 'NVCN', 'NVIV', 'NWG', 'NXE', 'NXTD', 'NYMT', 'OBLG', 'OBLN', 'OBSV', 'OCG', 'OCGN', 'OCSL', 'OCX', 'OEG', 'OGEN', 'OGI', 'OIBR.C', 'OII', 'OIIM', 'OIS', 'ONCT', 'ONCY', 'ONTX', 'OPGN', 'OPK', 'OPTN', 'OPTT', 'ORBC', 'ORC', 'ORMP', 'ORN', 'ORTX', 'OSMT', 'OSW', 'OTIC', 'OTLK', 'OVID', 'OXBR', 'OXLC', 'PAA', 'PAE', 'PAGP', 'PAVM', 'PAYS', 'PBF', 'PBI', 'PDSB', 'PED', 'PEI', 'PEIX', 'PFMT', 'PGEN', 'PGRE', 'PHAS', 'PHIO', 'PHUN', 'PIRS', 'PIXY', 'PLAG', 'PLG', 'PLIN', 'PLM', 'PLYA', 'PNNT', 'POAI', 'POWW', 'PPBT', 'PPR', 'PPSI', 'PPT', 'PROG', 'PRPO', 'PRQR', 'PRTK', 'PRTY', 'PSEC', 'PSTI', 'PSTV', 'PT', 'PTE', 'PTEN', 'PTMN', 'PTN', 'PULM', 'PUMP', 'PVL', 'PXLW', 'PXS', 'QD', 'QEP', 'QIWI', 'QLGN', 'QLI', 'QTNT', 'QTT', 'QUAD', 'QUOT', 'RAIL', 'RAVE', 'RBBN', 'RCON', 'RDHL', 'REED', 'REFR', 'REI', 'REPH', 'RES', 'RESN', 'RETO', 'RFP', 'RGLS', 'RGS', 'RHE', 'RIBT', 'RIG', 'RIGL', 'RKDA', 'RLH', 'RMED', 'RMTI', 'RNWK', 'RPAI', 'RPT', 'RRC', 'RRD', 'RTLR', 'RUHN', 'RWLK', 'RWT', 'RYAM', 'SALM', 'SAN', 'SAND', 'SB', 'SBBP', 'SBS', 'SCKT', 'SCOR', 'SCYX', 'SD', 'SDPI', 'SEAC', 'SEEL', 'SELB', 'SENS', 'SESN', 'SFET', 'SFL', 'SFT', 'SGBX', 'SGLB', 'SGOC', 'SHIP', 'SID', 'SIEN', 'SIF', 'SIFY', 'SILV', 'SINO', 'SINT', 'SIOX', 'SIRI', 'SLCA', 'SLDB', 'SLGG', 'SLRX', 'SLS', 'SM', 'SMFG', 'SMSI', 'SMTS', 'SMTX', 'SNCA', 'SNCR', 'SND', 'SNDE', 'SNDL', 'SNES', 'SNGX', 'SNMP', 'SNOA', 'SNR', 'SNSS', 'SOI', 'SOLO', 'SONM', 'SONN', 'SOS', 'SPCB', 'SPPI', 'SQFT', 'SQNS', 'SREV', 'SRGA', 'SSL', 'STAF', 'STCN', 'STON', 'STSA', 'SUP', 'SUPV', 'SVM', 'SVRA', 'SWN', 'SXC', 'SXTC', 'SYBX', 'SYN', 'SYPR', 'TACO', 'TALO', 'TANH', 'TAOP', 'TAST', 'TAT', 'TATT', 'TBLT', 'TCCO', 'TCDA', 'TCON', 'TEDU', 'TEF', 'TELL', 'TENX', 'TEO', 'TGA', 'TGB', 'TGC', 'THM', 'THMO', 'TK', 'TKAT', 'TKC', 'TLGT', 'TLMD', 'TLSA', 'TLYS', 'TMBR', 'TMDI', 'TMQ', 'TMST', 'TNAV', 'TNXP', 'TOPS', 'TOUR', 'TPRE', 'TRCH', 'TRIB', 'TRIT', 'TRST', 'TRUE', 'TRVG', 'TRVN', 'TRX', 'TRXC', 'TTI', 'TTNP', 'TTOO', 'TUSK', 'TV', 'TWI', 'TWO', 'TXMD', 'TYME', 'UAMY', 'UBX', 'UEC', 'UEPS', 'UGP', 'UMC', 'UONE', 'UONEK', 'URG', 'USAS', 'USAT', 'USEG', 'USIO', 'USWS', 'USX', 'UTSI', 'UUUU', 'UWMC', 'UXIN', 'VBIV', 'VBLT', 'VCNX', 'VEDL', 'VEON', 'VERB', 'VERO', 'VERU', 'VET', 'VGZ', 'VHC', 'VIOT', 'VIRI', 'VISL', 'VIV', 'VIVE', 'VKTX', 'VNTR', 'VRA', 'VRAY', 'VSTM', 'VTGN', 'VTNR', 'VTVT', 'VVOS', 'VXRT', 'VYGR', 'VYNE', 'WATT', 'WEI', 'WETF', 'WIMI', 'WISA', 'WIT', 'WKEY', 'WMC', 'WORX', 'WPRT', 'WPX', 'WRAP', 'WRN', 'WSR', 'WTER', 'WTI', 'WTRH', 'WTTR', 'WVE', 'WWR', 'XAIR', 'XAN', 'XBIO', 'XCUR', 'XELA', 'XELB', 'XERS', 'XNET', 'XPL', 'XSPA', 'XXII', 'YCBD', 'YGYI', 'YJ', 'YPF', 'YRCW', 'YTRA', 'YVR', 'ZAGG', 'ZIOP', 'ZIXI', 'ZKIN', 'ZNGA', 'ZOM', 'ZSAN', 'ZVO', 'ZYNE']
def get_name(_ticker):
try:
database[_ticker] = yf.Ticker(_ticker).info['longName']
except:
WRONG_TICKERS.append(_ticker)
pass
return(database)
number_of_cpu = joblib.cpu_count()
delayed_funcs = [delayed(get_name)(_ticker) for _ticker in stocks]
parallel_pool = Parallel(n_jobs=number_of_cpu,prefer="threads") # processes threads
parallel_pool(delayed_funcs)
OUTPUT:
Note that depending on your computer, it could be faster withe 'processes' instead of 'threads'. You have to test to know it.
Edit : When re-running the same code, everything is in 404 ERROR. Bust be a problem with yfinance
Edit 2 : It's okay again...
Related
How to calculate percentages from multiple columns
I want to create a table that looks like this: So far I have a table I created to get the value counts but I need help with creating a table that calculates the total value of row 0 and 1. I'm using this dataset: https://github.com/fivethirtyeight/data/tree/master/bob-ross Code: ross = bobross[['Apple frame', 'Aurora borealis', 'Barn', 'Beach', 'Boat', 'Bridge', 'Building', 'Bushes', 'Cabin', 'Cactus', 'Circle frame', 'Cirrus clouds', 'Cliff', 'Clouds', 'Coniferous tree', 'Cumulus clouds', 'Decidious tree', 'Diane andre', 'Dock', 'Double oval frame', 'Farm', 'Fence', 'Fire', 'Florida frame', 'Flowers', 'Fog', 'Framed', 'Grass', 'Guest', 'Half circle frame', 'Half oval frame', 'Hills', 'Lake', 'Lakes', 'Lighthouse', 'Mill', 'Moon', 'At least one mountain', 'At least two mountains', 'Nighttime', 'Ocean', 'Oval frame', 'Palm trees', 'Path', 'Person', 'Portrait', 'Rectangle 3d frame', 'Rectangular frame', 'River or stream', 'Rocks', 'Seashell frame', 'Snow', 'Snow-covered mountain', 'Split frame', 'Steve ross', 'Man-made structure', 'Sun', 'Tomb frame', 'At least one tree', 'At least two trees', 'Triple frame', 'Waterfall', 'Waves', 'Windmill', 'Window frame', 'Winter setting', 'Wood framed']].apply(pd.Series.value_counts) ross
IIUC, import pandas as pd import numpy as np df = pd.read_csv('https://raw.githubusercontent.com/fivethirtyeight/data/master/bob-ross/elements-by-episode.csv') dfi = df.set_index(['EPISODE', 'TITLE']) (dfi.sum()/np.sum(dfi.to_numpy())) Output: APPLE_FRAME 0.000310 AURORA_BOREALIS 0.000621 BARN 0.005278 BEACH 0.008382 BOAT 0.000621 ... WAVES 0.010556 WINDMILL 0.000310 WINDOW_FRAME 0.000310 WINTER 0.021422 WOOD_FRAMED 0.000310 Length: 67, dtype: float64
find words that can be made from a string in python
im fairly new to python and im not sure how to tackle my problem im trying to make a program that can take a string of 15 characters from a .txt file and find words that you can make from those characters with a dictionary file, than output those words to another text file. this is what i have tried: attempting to find words that don't contain the characters and removing them from the list various anagram solver type programs of git hub i tried this sudo pip3 install anagram-solverbut it has been 3 hours on 15 characters and it is still running im new so please tell me if im forgetting something
If you're looking for "perfect" anagrams, i.e. those that contain exactly the same number of characters, not a subset, it's pretty easy: take your word-to-find, sort it by its letters take your dictionary, sort each word by its letters if the sorted versions match, they're anagrams def find_anagrams(seek_word): sorted_seek_word = sorted(seek_word.lower()) for word in open("/usr/share/dict/words"): word = word.strip() # remove trailing newline sorted_word = sorted(word.lower()) if sorted_word == sorted_seek_word and word != seek_word: print(seek_word, word) if __name__ == "__main__": find_anagrams("begin") find_anagrams("nicer") find_anagrams("decor") prints (on my macOS machine – Windows machines won't have /usr/share/dict/words by default, and some Linux distributions need it installed separately) begin being begin binge nicer cerin nicer crine decor coder decor cored decor Credo EDIT A second variation that finds all words that are assemblable from the letters in the original word, using collections.Counter: import collections def find_all_anagrams(seek_word): seek_word_counter = collections.Counter(seek_word.lower()) for word in open("/usr/share/dict/words"): word = word.strip() # remove trailing newline word_counter = collections.Counter(word.strip()) if word != seek_word and all( n <= seek_word_counter[l] for l, n in word_counter.items() ): yield word if __name__ == "__main__": print("decoration", set(find_all_anagrams("decoration"))) Outputs e.g. decoration {'carte', 'drona', 'roit', 'oat', 'cantred', 'rond', 'rid', 'centroid', 'trine', 't', 'tenai', 'cond', 'toroid', 'recon', 'contra', 'dain', 'cootie', 'iao', 'arctoid', 'oner', 'indart', 'tine', 'nace', 'rident', 'cerotin', 'cran', 'eta', 'eoan', 'cardoon', 'tone', 'trend', 'trinode', 'coaid', 'ranid', 'rein', 'end', 'actine', 'ide', 'cero', 'iodate', 'corn', 'oer', 'retia', 'nidor', 'diter', 'drat', 'tec', 'tic', 'creat', 'arent', 'coon', 'doater', 'ornoite', 'terna', 'docent', 'tined', 'edit', 'octroi', 'eric', 'read', 'toned', 'c', 'tera', 'can', 'rocta', 'cortina', 'adonite', 'iced', 'no', 'natr', 'net', 'oe', 'rodeo', 'actor', 'otarine', 'on', 'cretin', 'ericad', 'dance', 'tornade', 'tinea', 'coontie', 'anerotic', 'acrite', 'ra', 'danio', 'inroad', 'inde', 'tied', 'tar', 'coronae', 'tid', 'rad', 'doc', 'derat', 'tea', 'acerin', 'ronde', 'recti', 'areito', 'drain', 'odontic', 'octoad', 'rio', 'actin', 'tread', 'rect', 'ariot', 'road', 'doctrine', 'enactor', 'indoor', 'toco', 'ton', 'trice', 'norite', 'nea', 'coda', 'noria', 'rot', 'trona', 'rice', 'arite', 'eria', 'orad', 'rate', 'toed', 'enact', 'crinet', 'cento', 'arid', 'coot', 'nat', 'nar', 'cain', 'at', 'antired', 'ear', 'triode', 'doter', 'cedarn', 'orna', 'rand', 'tari', 'crea', 'tiar', 'retan', 'tire', 'cora', 'aroid', 'iron', 'tenio', 'enroot', 'd', 'oaric', 'acetin', 'tain', 'neat', 'noter', 'tien', 'aortic', 'tode', 'dicer', 'irate', 'tie', 'canid', 'ado', 'noticer', 'arn', 'nacre', 'ceration', 'ratine', 'denaro', 'cotoin', 'aint', 'canto', 'cinter', 'decani', 'roon', 'donor', 'acnode', 'aide', 'doer', 'tacnode', 'oread', 'acetoin', 'rine', 'acton', 'conoid', 'a', 'otocrane', 'norate', 'care', 'ticer', 'io', 'detain', 'cedar', 'ta', 'toadier', 'atone', 'cornet', 'dacoit', 'toric', 'orate', 'arni', 'adroit', 'rend', 'tanier', 'rooted', 'doit', 'dier', 'odorate', 'trica', 'rated', 'cotonier', 'dine', 'roid', 'cairned', 'cat', 'i', 'coin', 'octine', 'trod', 'orc', 'cardo', 'eniac', 'arenoid', 'erd', 'creant', 'oda', 'ratio', 'ceria', 'ad', 'acorn', 'dorn', 'deric', 'credit', 'door', 'cinder', 'cantor', 'er', 'doon', 'coner', 'donate', 'roe', 'tora', 'antic', 'racoon', 'ooid', 'noa', 'tae', 'coroa', 'earn', 'retain', 'canted', 'norie', 'rota', 'tao', 'redan', 'rondo', 'entia', 'ctenoid', 'cent', 'daroo', 'inrooted', 'roed', 'adore', 'coat', 'e', 'rat', 'deair', 'arend', 'coir', 'acid', 'coronate', 'rodent', 'acider', 'iota', 'codo', 'redaction', 'cot', 'aeric', 'tonic', 'candier', 'decart', 'dicta', 'dot', 'recoat', 'caroon', 'rone', 'tarie', 'tarin', 'teca', 'oar', 'ocrea', 'ante', 'creation', 'tore', 'conto', 'tairn', 'roc', 'conter', 'coeditor', 'certain', 'roncet', 'decator', 'not', 'coatie', 'toran', 'caid', 'redia', 'root', 'cad', 'cartoon', 'n', 'coed', 'cand', 'neo', 'coronadite', 'dare', 'dartoic', 'acoin', 'detar', 'dite', 'trade', 'train', 'ordinate', 'racon', 'citron', 'dan', 'doat', 'nito', 'tercia', 'rote', 'cooer', 'acone', 'rita', 'caret', 'dern', 'enatic', 'too', 'cried', 'tade', 'dit', 'orient', 'ria', 'torn', 'coati', 'cnida', 'note', 'tried', 'acrid', 'nitro', 'acron', 'tern', 'one', 'it', 'naio', 'dor', 'ea', 'ca', 'ire', 'inert', 'orcanet', 'cine', 'coe', 'nardoo', 'deota', 'den', 'toi', 'adion', 'to', 'rite', 'nectar', 'rane', 'riant', 'cod', 'de', 'adit', 'airt', 'ie', 'retin', 'toon', 'cane', 'aeon', 'are', 'cointer', 'actioner', 'crin', 'detrain', 'art', 'cant', 'ort', 'tored', 'antoeci', 'tier', 'cite', 'onto', 'coater', 'tranced', 'atonic', 'roi', 'in', 'roan', 'decoat', 'rain', 'cronet', 'ronco', 'dont', 'citer', 'redact', 'cider', 'nor', 'octan', 'ration', 'doina', 'rie', 'aero', 'noted', 'crate', 'crain', 'cadet', 'condite', 'ran', 'odeon', 'date', 'eat', 'intoed', 'cation', 'carone', 'ratoon', 'retina', 'tiao', 'nice', 'nodi', 'codon', 'coo', 'torc', 'dent', 'entad', 'ne', 'toe', 'dae', 'decant', 'redcoat', 'coiner', 'irade', 'air', 'oint', 'coronet', 'radon', 'ce', 'octonare', 'oaten', 'citrean', 'dice', 'dancer', 'carotid', 'cretion', 'don', 'cion', 'nei', 'tead', 'nori', 'nacrite', 'ootid', 'rancid', 'dornic', 'orenda', 'cairn', 'aroon', 'coardent', 'aider', 'notice', 'cored', 'adorn', 'tad', 'carid', 'otic', 'dian', 'od', 'dint', 'tercio', 'die', 'conred', 'tice', 'rant', 'candor', 'anti', 'dar', 'antre', 'cornea', 'ordain', 'corona', 'recta', 'redo', 'tare', 'coranto', 'action', 'caird', 'creta', 'naid', 'tri', 'acre', 'crane', 'coated', 'citronade', 'anoetic', 'tenor', 'anode', 'triad', 'ceratoid', 'rod', 'idea', 'carton', 'cortin', 'endaortic', 'dicot', 'tend', 'da', 'tod', 'erotica', 'cord', 'coreid', 'toader', 'dace', 'tan', 'editor', 'rection', 'toner', 'cone', 'ni', 'tide', 'coder', 'din', 'ocote', 'ore', 'daer', 'octane', 'darn', 'do', 'reit', 'na', 'catenoid', 'tron', 'condor', 'crinated', 'cordon', 'crone', 'toad', 'noir', 'into', 'tirade', 'nadir', 'ant', 'ade', 'droit', 'icon', 'drone', 'ared', 'cardin', 'nid', 'dire', 'orcin', 'donator', 'rani', 'tane', 'ace', 'iodo', 'doria', 'ride', 'eon', 'ornate', 'cedrat', 'aire', 'carotin', 'dation', 'tear', 'onca', 'cote', 'taroc', 'con', 'nod', 'dinero', 'ecad', 'recant', 'ae', 'octad', 'cor', 'doctor', 'acridone', 'neti', 'cordite', 'crotin', 'aneroid', 'diota', 'coorie', 'dita', 'aconite', 'nard', 'cadent', 'ectad', 'rance', 'rea', 'tai', 'denat', 'rood', 'acne', 'decan', 'ani', 'rit', 'cit', 'cetin', 'odor', 'acorned', 'iceroot', 'inro', 'crood', 'daric', 'dacite', 'trone', 'acier', 'reina', 'oncia', 'drant', 'acrodont', 'nacred', 'cotrine', 'dinar', 'tean', 'atoner', 'toorie', 'nadorite', 'cardon', 'taen', 'tin', 'conte', 'acoine', 'dater', 'diact', 'aid', 'anodic', 'coronated', 'direct', 're', 'era', 'anticor', 'triace', 'octoid', 'dao', 'corta', 'edict', 'trode', 'ode', 'orant', 'niter', 'centrad', 'cater', 'tronc', 'coronad', 'r', 'toro', 'ar', 'once', 'ora', 'trace', 'creodont', 'erotic', 'ai', 'troca', 'ion', 'tecon', 'tra', 'acor', 'radio', 'acred', 'croon', 'tricae', 'recto', 'riden', 'andorite', 'taro', 'red', 'dear', 'ate', 'tinder', 'trin', 'deacon', 'ardent', 'aer', 'arc', 'crine', 'dart', 'diet', 'riot', 'tanrec', 'tor', 'noetic', 'ret', 'trance', 'ona', 'rind', 'coto', 'daoine', 'teind', 'toa', 'inter', 'code', 'cart', 'aion', 'detin', 'core', 'oont', 'rent', 'cedrin', 'card', 'trained', 'o', 'recoin', 'cro', 'and', 'diner', 'id', 'cordant', 'cedron', 'ditone', 'odic', 'cadi', 'cerin', 'nit', 'ecoid', 'nide', 'ean', 'andric', 'tind', 'raid', 'crena', 'oroide', 'roadite', 'canter', 'idant', 'cade', 'race', 'ten', 'caner', 'tarn', 'cooter', 'etna', 'tornadic', 'irone', 'ice', 'en', 'oord', 'oared', 'draine', 'cordate', 'react', 'reaction', 'tornado', 'troco', 'niota', 'carotenoid', 'an', 'cader', 'naric', 'car', 'centiar', 'ti', 'cearin', 'aroint', 'crined', 'iter', 'di', 'or', 'trio', 'dari', 'oration', 'orcein', 'coned', 'odorant', 'dean', 'coadore', 'cate', 'drate', 'dirten', 'ted', 'done', 'cadre', 'ocean', 'tired', 'adet', 'dirt', 'te', 'nae', 'ceti', 'cern', 'rotan', 'doe', 'roto', 'dote', 'node', 'ait', 'act', 'canoe', 'rode'}
why does this it say in the console Process finished with exit code 0 instead of printing the 'sen' variable? [duplicate]
This question already has answers here: How to check if type of a variable is string? [duplicate] (22 answers) Closed 2 years ago. import random import sys def v1_debug(v1, subject): if v1 != str and subject != str: sys.exit() else: if subject == 'He' or 'She' or 'It': for i in v1: if i == [len(v1)+1]: if i == 's' or 'z' or 'x' or 'o': v1 = v1 + 'es' elif i == 'y': v1 = v1 - 'y' + 'ies' elif v1[len(v1)] == 's' and v1[len(v1)+1] == 'h': v1 = v1 + 'es' elif v1[len(v1)] == 'c' and v1[len(v1)+1] == 'h': v1 = v1 + 'es' if subject == 'I' or 'You' or 'We' or 'They': for i in v1: if i == v1[len(v1)+1]: v1 = v1 + 'ing' return '' def default_positive_form(): try: sbj = ['He', 'She', 'It', 'I', 'You', 'We', 'They'] v1 = ['be', 'beat', 'become', 'begin', 'bend', 'bet', 'bid', 'bite', 'blow', 'break', 'bring', 'build', 'burn', 'buy', 'catch', 'choose', 'come', 'cost', 'cut', 'dig', 'dive', 'do', 'draw', 'dream', 'drive', 'drink', 'eat', 'fall', 'feel', 'fight', 'find', 'fly', 'forget', 'forgive', 'freeze', 'get', 'give', 'go', 'grow', 'hang', 'have', 'hear', 'hide', 'hit', 'hold', 'hurt', 'keep', 'know', 'lay', 'lead', 'leave', 'lend', 'let', 'lie', 'lose', 'make', 'mean', 'meet', 'pay', 'put', 'read', 'ride', 'ring', 'rise', 'run', 'say', 'see', 'sell', 'send', 'show', 'shut', 'sing', 'sit', 'sleep', 'speak', 'spend', 'stand', 'swim', 'take', 'teach', 'tear', 'tell', 'think', 'throw', 'understand', 'wake', 'wear', 'win', 'write'] sbj = random.choice(sbj) v1 = random.choice(v1) verb_debug = v1_debug(v1, sbj) sen = '' if sbj == 'I': sen = sbj + 'am' + verb_debug elif sbj == 'He' or 'She' or 'It': sen = sbj + 'is' + verb_debug elif sbj == 'You' or 'We' or 'They': sen = sbj + 'are' + verb_debug print(f'{sen}') except NameError: print('this is bullshit') return default_positive_form() this is python 3.8
sen will only consist of an empty string if none of the conditions of your if/elif/elif blocks are met. Change the print line to print(f"sen is: {sen}") But that's not the real problem. obj != str does not check if obj is a string, it checks to see if the object is pointing to the type constant str (Thanks Charles Duffy for the comment). Instead, use the builtin function isinstance() like so: if not isinstance(v1, str) and not isinstance(subject, str): print("Variables are the wrong type!") sys.exit()
Move item from list to another one for poker card game Python
I'm trying to make poker game in Python. In the while fuction I want to move the used cards in a separate(used cards) list. The problem is sometimes when I print the hand I can get duplicates. Something is wrong with my sorting strategy and I don't know what. Can you help me? import random deck = ['AS', 'KS', 'QS', 'JS', '10S', '9S', '8S', '7S', '6S', '5S', '4S', '3S', '2S',\ 'AD', 'KD', 'QD', 'JD', '10D', '9D', '8D', '7D', '6D', '5D', '4D', '3D', '2D',\ 'AC', 'KC', 'QC', 'JC', '10C', '9C', '8C', '7C', '6C', '5C', '4C', '3C', '2C',\ 'AH', 'KH', 'QH', 'JH', '10H', '9H', '8H', '7H', '6H', '5H', '4H', '3H', '2H'] used = [] p1 = [] p2 = [] a = 0 while (a < 2): drawn_card = random.choice(deck) deck.append(drawn_card) deck = [f for f in deck if f not in used] p1.append(drawn_card) a+=1
Well the random choice is not guaranteed to be unique, thus when you do: drawn_card = random.choice(deck) .. p1.append(drawn_card) you may end up having duplicates (that explains that you some time see duplicates and some not). Check if drawn_card is in the list first and if not, then append. That way you won't have duplicates. In code you could it like this: if drawn_card not in p1: p1.append(drawn_card) Or, as Rory Daulton said: If you are allowed, you could shuffle the entire deck, then remove consecutive items from that list.
you need to compare the random card with "p1" not with "deck": import random deck = ['AS', 'KS', 'QS', 'JS', '10S', '9S', '8S', '7S', '6S', '5S', '4S', '3S', '2S',\ 'AD', 'KD', 'QD', 'JD', '10D', '9D', '8D', '7D', '6D', '5D', '4D', '3D', '2D',\ 'AC', 'KC', 'QC', 'JC', '10C', '9C', '8C', '7C', '6C', '5C', '4C', '3C', '2C',\ 'AH', 'KH', 'QH', 'JH', '10H', '9H', '8H', '7H', '6H', '5H', '4H', '3H', '2H'] used = [] p1 = [] a = 0 while (a < 2): drawn_card = random.choice(deck) print(drawn_card) if drawn_card not in p1: p1.append(drawn_card) a += 1 continue print (p1)
Find a list of things (e.g. a list of rivers) using NLTK
I would like to get lists of things, for example a list of names of rivers or a list of types of animals. NLTK looks like it might be the thing for this, but I'm not sure how to do what I want. I'd like to have a function like: get_list_of("river") that would return something like ["amazon", "mississippi", "thames", ...]
I would suggest looking at NLTK wordnet API, see http://www.nltk.org/howto/wordnet.html. But after doing some digging seems like Proper Nouns (i.e. names of river are not easy to track down in wordnet) >>> from nltk.corpus import wordnet as wn >>> wn.synsets('river') [Synset('river.n.01')] >>> wn.synset('river.n.01') Synset('river.n.01') >>> wn.synset('river.n.01').lemma_names ['river'] >>> wn.synsets('amazon') [Synset('amazon.n.01'), Synset('amazon.n.02'), Synset('amazon.n.03'), Synset('amazon.n.04')] >>> wn.synset('amazon.n.01').definition 'a large strong and aggressive woman' >>> wn.synset('amazon.n.02').definition '(Greek mythology) one of a nation of women warriors of Scythia (who burned off the right breast in order to use a bow and arrow more effectively)' >>> wn.synset('amazon.n.03').definition "a major South American river; arises in the Andes and flows eastward into the South Atlantic; the world's 2nd longest river (4000 miles)" >>> wn.synset('amazon.n.04').definition 'mainly green tropical American parrots' As a brute force way, look for "river" in a synsets' definitions, as such: from itertools import chain list(chain(*[i.lemma_names for i in wn.all_synsets() if "river" in i.definition])) [out]: ['anaclinal', 'cataclinal', 'Acheronian', 'Acherontic', 'Stygian', 'hit-and-run', 'fluvial', 'riparian', 'Lao', 'debouch', 'rejuvenate', 'drive', 'ford', 'ascend', 'plant', 'drive', 'ford', 'fording', 'drive', 'driving', 'flood_control', 'conservancy', 'road_rage', 'Aegospotami', 'Aegospotamos', 'Yalu_River', 'three-spined_stickleback', 'Gasterosteus_aculeatus', 'ten-spined_stickleback', 'Gasterosteus_pungitius', 'placoderm', 'hellbender', 'mud_puppy', 'Cryptobranchus_alleganiensis', 'plains_spadefoot', 'Scaphiopus_bombifrons', 'mud_turtle', 'cooter', 'river_cooter', 'Pseudemys_concinna', 'spiny_softshell', 'Trionyx_spiniferus', 'smooth_softshell', 'Trionyx_muticus', 'teal', 'pintail', 'pin-tailed_duck', 'Anas_acuta', 'Ancylus', 'genus_Ancylus', 'freshwater_mussel', 'freshwater_clam', 'long-clawed_prawn', 'river_prawn', 'Palaemon_australis', 'Platanistidae', 'family_Platanistidae', 'hippopotamus', 'hippo', 'river_horse', 'Hippopotamus_amphibius', 'waterbuck', 'Australian_lungfish', 'Queensland_lungfish', 'Neoceratodus_forsteri', 'alewife', 'Alosa_pseudoharengus', 'Pomolobus_pseudoharengus', 'sockeye', 'sockeye_salmon', 'red_salmon', 'blueback_salmon', 'Oncorhynchus_nerka', 'brown_trout', 'salmon_trout', 'Salmo_trutta', 'Australian_arowana', 'Dawson_River_salmon', 'saratoga', 'spotted_barramundi', 'spotted_bonytongue', 'Scleropages_leichardti', 'Australian_bonytongue', 'northern_barramundi', 'Scleropages_jardinii', 'crappie', 'striped_bass', 'striper', 'Roccus_saxatilis', 'rockfish', 'bolti', 'Tilapia_nilotica', 'Chinese_paddlefish', 'Psephurus_gladis', 'air_bag', 'Augean_stables', 'barouche', 'bend', 'curve', 'boathouse', 'box', 'box_seat', 'brassie', 'bridge', 'span', 'bridle', 'brougham', 'buggy_whip', 'cab', 'car_mirror', 'coach', 'four-in-hand', 'coach-and-four', 'cockpit', 'death_seat', 'dredge', 'dredging_bucket', 'elbow', 'flat_tip_screwdriver', 'hansom', 'hansom_cab', 'keelboat', 'Lake_Volta', 'levee', 'levee', 'L-plate', 'machine_screw', 'outfall', 'Phillips_screwdriver', 'pull-in', 'pull-up', 'river_boat', 'showboat', 'skidpan', 'spiral_ratchet_screwdriver', 'ratchet_screwdriver', 'towpath', 'towing_path', 'truck_stop', 'willowware', 'willow-pattern', 'woodscrew', 'Copehan', 'Volgaic', 'horn', 'rip', 'riptide', 'tide_rip', 'crosscurrent', 'countercurrent', 'crappie', 'red_salmon', 'sockeye', 'sockeye_salmon', 'logjam', 'Teamsters_Union', 'car_pool', 'conservancy', 'headwater', 'river_basin', 'basin', 'watershed', 'drainage_basin', 'catchment_area', 'catchment_basin', 'drainage_area', 'confluence', 'meeting', 'Mammoth_Cave_National_Park', 'Zion_National_Park', 'watershed', 'water_parting', 'divide', 'Yangon', 'Rangoon', "N'Djamena", 'Ndjamena', 'Fort-Lamy', 'capital_of_Chad', 'Kinshasa', 'Leopoldville', 'Saxony', 'Sachsen', 'Saxe', 'Cologne', 'Koln', 'Mannheim', 'Rhineland', 'Rheinland', 'Ruhr', 'Ruhr_Valley', 'West_Bank', 'Pennines', 'Pennine_Chain', 'Ottawa', 'Canadian_capital', 'capital_of_Canada', 'Antwerpen', 'Antwerp', 'Anvers', 'Orleans', 'Rhone-Alpes', 'Friesland', 'Timbuktu', 'Bydgoszcz', 'Bromberg', 'Novosibirsk', 'Tbilisi', 'Tiflis', 'capital_of_Georgia', 'Toledo', 'Selma', 'Denver', 'Mile-High_City', 'capital_of_Colorado', 'Hartford', 'capital_of_Connecticut', 'Savannah', 'Topeka', 'capital_of_Kansas', 'Louisville', 'New_Orleans', 'Detroit', 'Motor_City', 'Motown', 'Minneapolis', 'Saint_Paul', 'St._Paul', 'capital_of_Minnesota', 'Jefferson_City', 'capital_of_Missouri', 'Saint_Louis', 'St._Louis', 'Gateway_to_the_West', 'Billings', 'Great_Falls', 'Omaha', 'Concord', 'capital_of_New_Hampshire', 'Manchester', 'Trenton', 'capital_of_New_Jersey', 'Albuquerque', 'New_Netherland', 'Albany', 'capital_of_New_York', 'Erie_Canal', 'New_York', 'New_York_City', 'Greater_New_York', 'West_Point', 'Niagara_Falls', 'Schenectady', 'Bismarck', 'capital_of_North_Dakota', 'Fargo', 'Cincinnati', 'Tulsa', 'Chester', 'Philadelphia', 'City_of_Brotherly_Love', 'Pierre', 'capital_of_South_Dakota', 'Mount_Vernon', 'Charleston', 'capital_of_West_Virginia', 'Huntington', 'Morgantown', 'Parkersburg', 'Wheeling', 'Casper', 'Ciudad_Bolivar', 'Aare', 'Aar', 'Aare_River', 'Acheron', 'River_Acheron', 'Adige', 'River_Adige', 'Aire', 'River_Aire', 'Aire_River', 'Alabama', 'Alabama_River', 'Allegheny', 'Allegheny_River', 'Amazon', 'Amazon_River', 'Amur', 'Amur_River', 'Heilong_Jiang', 'Heilong', 'Angara', 'Angara_River', 'Tunguska', 'Upper_Tunguska', 'Apalachicola', 'Apalachicola_River', 'Araguaia', 'Araguaia_River', 'Araguaya', 'Araguaya_River', 'Aras', 'Araxes', 'Arauca', 'Argun', 'Argun_River', 'Ergun_He', 'Arkansas', 'Arkansas_River', 'Arno', 'Arno_River', 'River_Arno', 'Avon', 'River_Avon', 'Upper_Avon', 'Upper_Avon_River', 'Avon', 'River_Avon', 'bar', 'Bighorn', 'Bighorn_River', 'Big_Sioux_River', 'billabong', 'bluff', 'body_of_water', 'water', 'bottomland', 'bottom', 'Brahmaputra', 'Brahmaputra_River', 'branch', 'Brazos', 'Brazos_River', 'brook', 'creek', 'Caloosahatchee', 'Caloosahatchee_River', 'Cam', 'River_Cam', 'Cam_River', 'Canadian', 'Canadian_River', 'canyon', 'canon', 'Cape_Fear_River', 'channel', 'Chao_Phraya', 'Charles', 'Charles_River', 'Chattahoochee', 'Chattahoochee_River', 'Cimarron', 'Cimarron_River', 'Clinch_River', 'Clyde', 'Cocytus', 'River_Cocytus', 'Colorado', 'Colorado_River', 'Colorado', 'Colorado_River', 'Columbia', 'Columbia_River', 'Congo', 'Congo_River', 'Zaire_River', 'Connecticut', 'Connecticut_River', 'Coosa', 'Coosa_River', 'Cumberland', 'Cumberland_River', 'dale', 'Danube', 'Danube_River', 'Danau', 'Darling', 'Darling_River', 'Delaware', 'Delaware_River', 'delta', 'Demerara', 'Detroit_River', 'distributary', 'Dnieper', 'Dnieper_River', 'Don', 'Don_River', 'Ebro', 'Ebro_River', 'Elbe', 'Elbe_River', 'Elizabeth_River', 'estuary', 'Euphrates', 'Euphrates_River', 'Flint', 'Flint_River', 'floodplain', 'flood_plain', 'Forth', 'Forth_River', 'Fox_River', 'Ganges', 'Ganges_River', 'Gan_Jiang', 'Kan_River', 'Garonne', 'Garonne_River', 'Gila', 'Gila_River', 'gorge', 'Grand_River', 'Green', 'Green_River', 'headstream', 'Housatonic', 'Housatonic_River', 'Huang_He', 'Hwang_Ho', 'Yellow_River', 'Hudson', 'Hudson_River', 'IJssel', 'IJssel_river', 'Illinois_River', 'Indigirka', 'Indigirka_River', 'Indus', 'Indus_River', 'Irrawaddy', 'Irrawaddy_River', 'Irtish', 'Irtish_River', 'Irtysh', 'Irtysh_River', 'Isere', 'Isere_River', 'James', 'James_River', 'James', 'James_River', 'Jordan', 'Jordan_River', 'Kansas', 'Kansas_River', 'Kaw_River', 'Kasai', 'Kasai_River', 'River_Kasai', 'Kissimmee', 'Kissimmee_River', 'Klamath', 'Klamath_River', 'Kura', 'Kura_River', 'Lake_Chad', 'Chad', 'Lehigh_River', 'Lena', 'Lena_River', 'Lethe', 'River_Lethe', 'liman', 'Limpopo', 'Crocodile_River', 'Little_Bighorn', 'Little_Bighorn_River', 'Little_Horn', 'Little_Missouri', 'Little_Missouri_River', 'Little_Sioux_River', 'Little_Wabash', 'Little_Wabash_River', 'Loire', 'Loire_River', 'Mackenzie', 'Mackenzie_River', 'Madeira', 'Madeira_River', 'Magdalena', 'Magdalena_River', 'meander', 'Mekong', 'Mekong_River', 'Merrimack', 'Merrimack_River', 'Meuse', 'Meuse_River', 'Milk', 'Milk_River', 'Mississippi', 'Mississippi_River', 'Missouri', 'Missouri_River', 'Mobile', 'Mobile_River', 'Mohawk_River', 'Monongahela', 'Monongahela_River', 'Moreau_River', 'Murray', 'Murray_River', 'Murrumbidgee', 'Murrumbidgee_River', 'Namoi', 'Namoi_River', 'Nan', 'Nan_River', 'Neckar', 'Neckar_River', 'Neosho', 'Neosho_River', 'Neva', 'Neva_River', 'New_River', 'Niagara', 'Niagara_River', 'Niger', 'Niger_River', 'Nile', 'Nile_River', 'North_Platte', 'North_Platte_River', 'Ob', 'Ob_River', 'Oder', 'Oder_River', 'Ohio', 'Ohio_River', 'Orange', 'Orange_River', 'Orinoco', 'Orinoco_River', 'Osage', 'Osage_River', 'Outaouais', 'Ottawa', 'Ottawa_river', 'Ouachita', 'Ouachita_River', 'Ouse', 'Ouse_River', 'oxbow', 'oxbow_lake', 'Parana', 'Parana_River', 'Parnaiba', 'Parnahiba', 'Pearl_River', 'Pee_Dee', 'Pee_Dee_River', 'Penobscot', 'Penobscot_River', 'Ping', 'Ping_River', 'Platte', 'Platte_River', 'Po', 'Po_River', 'Potomac', 'Potomac_River', 'Purus', 'Purus_River', 'rapid', 'Rappahannock', 'Rappahannock_River', 'Rhine', 'Rhine_River', 'Rhein', 'Rhone', 'Rhone_River', 'Rio_Grande', 'Rio_Bravo', 'riparian_forest', 'riverbank', 'riverside', 'riverbed', 'river_bottom', 'river_boulder', 'Russian_River', 'Saale', 'Saale_River', 'Sabine', 'Sabine_River', 'Sacramento_River', 'Saint_John', 'Saint_John_River', 'St._John', 'St._John_River', 'Saint_Johns', 'Saint_Johns_River', 'St._Johns', 'St._Johns_River', 'Saint_Lawrence', 'Saint_Lawrence_River', 'St._Lawrence', 'St._Lawrence_River', 'Sambre', 'Sambre_River', 'sandbank', 'San_Joaquin_River', 'Sao_Francisco', 'Saone', 'Saone_River', 'Savannah', 'Savannah_River', 'Scheldt', 'Scheldt_River', 'Seine', 'Seine_River', 'Severn', 'River_Severn', 'Severn_River', 'Severn', 'Severn_River', 'Seyhan', 'Seyhan_River', 'Shari', 'Shari_River', 'Chari', 'Chari_River', 'Shenandoah_River', 'Styx', 'River_Styx', 'Sun_River', 'Suriname_River', 'Surinam_River', 'Susquehanna', 'Susquehanna_River', 'Tagus', 'Tagus_River', 'Tallapoosa', 'Tallapoosa_River', 'Tennessee', 'Tennessee_River', 'Thames', 'River_Thames', 'Thames_River', 'Tiber', 'Tevere', 'Tigris', 'Tigris_River', 'Tocantins', 'Tocantins_River', 'Tombigbee', 'Tombigbee_River', 'Trent', 'River_Trent', 'Trent_River', 'Trinity_River', 'Tunguska', 'Lower_Tunguska', 'Tunguska', 'Stony_Tunguska', 'Tyne', 'River_Tyne', 'Tyne_River', 'Urubupunga', 'Urubupunga_Falls', 'Uruguay_River', 'valley', 'vale', 'Vetluga', 'Vetluga_River', 'Vistula', 'Vistula_River', 'Volga', 'Volga_River', 'Volkhov', 'Volkhov_River', 'Volta', 'waterfall', 'falls', 'water_system', 'Weser', 'Weser_River', 'Willamette', 'Willamette_River', 'Yalu', 'Yalu_River', 'Chang_Jiang', 'Changjiang', 'Chang', 'Yangtze', 'Yangtze_River', 'Yangtze_Kiang', 'Yazoo', 'Yazoo_River', 'Yenisei', 'Yenisei_River', 'Yenisey', 'Yenisey_River', 'Yukon', 'Yukon_River', 'Zambezi', 'Zambezi_River', 'Zhu_Jiang', 'Canton_River', 'Chu_Kiang', 'Pearl_River', 'Charon', 'naiad', 'Achilles', 'finisher', 'Algonkian', 'Algonkin', 'Arikara', 'Aricara', 'Chinook', 'Conoy', 'Halchidhoma', 'Hidatsa', 'Gros_Ventre', 'Kansa', 'Kansas', 'Karok', 'Maidu', 'Maricopa', 'Missouri', 'Mohave', 'Mojave', 'Ofo', 'Omaha', 'Maha', 'Osage', 'Oto', 'Otoe', 'Pamlico', 'Ponca', 'Ponka', 'Quapaw', 'Shahaptian', 'Sahaptin', 'Sahaptino', 'Shawnee', 'Tsimshian', 'Walapai', 'Hualapai', 'Hualpai', 'Yeniseian', 'Yakut', 'charioteer', 'driver', 'honker', 'lasher', 'mahout', 'nondriver', 'road_hog', 'roadhog', 'speeder', 'speed_demon', 'tailgater', 'teamster', 'test_driver', 'wagoner', 'waggoner', 'Cartier', 'Jacques_Cartier', 'Oldfield', 'Barney_Oldfield', 'Berna_Eli_Oldfield', 'debacle', 'bald_cypress', 'swamp_cypress', 'pond_bald_cypress', 'southern_cypress', 'Taxodium_distichum', 'Montezuma_cypress', 'Mexican_swamp_cypress', 'Taxodium_mucronatum', 'pistia', 'water_lettuce', 'water_cabbage', 'Pistia_stratiotes', 'Pistia_stratoites', 'great_yellowcress', 'Rorippa_amphibia', 'Nasturtium_amphibium', 'giant_reed', 'Arundo_donax', 'Phragmites', 'genus_Phragmites', 'black_birch', 'river_birch', 'red_birch', 'Betula_nigra', 'river_red_gum', 'river_gum', 'Eucalyptus_camaldulensis', 'Eucalyptus_rostrata', 'false_indigo', 'bastard_indigo', 'Amorpha_fruticosa', 'thermal_pollution', 'water_pollution', 'alluvial_soil', 'Senegal_gum', 'silt'] But it seems like there are still some noise from the "brute force" method. Let's try to assume that if it is the name of the river it should start with an uppercase, so let's try: list(chain(*[ [j for j in i.lemma_names if j[0].isupper()] for i in wn.all_synsets() if "river" in i.definition])) [out]: ['Acheronian', 'Acherontic', 'Stygian', 'Lao', 'Aegospotami', 'Aegospotamos', 'Yalu_River', 'Gasterosteus_aculeatus', 'Gasterosteus_pungitius', 'Cryptobranchus_alleganiensis', 'Scaphiopus_bombifrons', 'Pseudemys_concinna', 'Trionyx_spiniferus', 'Trionyx_muticus', 'Anas_acuta', 'Ancylus', 'Palaemon_australis', 'Platanistidae', 'Hippopotamus_amphibius', 'Australian_lungfish', 'Queensland_lungfish', 'Neoceratodus_forsteri', 'Alosa_pseudoharengus', 'Pomolobus_pseudoharengus', 'Oncorhynchus_nerka', 'Salmo_trutta', 'Australian_arowana', 'Dawson_River_salmon', 'Scleropages_leichardti', 'Australian_bonytongue', 'Scleropages_jardinii', 'Roccus_saxatilis', 'Tilapia_nilotica', 'Chinese_paddlefish', 'Psephurus_gladis', 'Augean_stables', 'Lake_Volta', 'L-plate', 'Phillips_screwdriver', 'Copehan', 'Volgaic', 'Teamsters_Union', 'Mammoth_Cave_National_Park', 'Zion_National_Park', 'Yangon', 'Rangoon', "N'Djamena", 'Ndjamena', 'Fort-Lamy', 'Kinshasa', 'Leopoldville', 'Saxony', 'Sachsen', 'Saxe', 'Cologne', 'Koln', 'Mannheim', 'Rhineland', 'Rheinland', 'Ruhr', 'Ruhr_Valley', 'West_Bank', 'Pennines', 'Pennine_Chain', 'Ottawa', 'Canadian_capital', 'Antwerpen', 'Antwerp', 'Anvers', 'Orleans', 'Rhone-Alpes', 'Friesland', 'Timbuktu', 'Bydgoszcz', 'Bromberg', 'Novosibirsk', 'Tbilisi', 'Tiflis', 'Toledo', 'Selma', 'Denver', 'Mile-High_City', 'Hartford', 'Savannah', 'Topeka', 'Louisville', 'New_Orleans', 'Detroit', 'Motor_City', 'Motown', 'Minneapolis', 'Saint_Paul', 'St._Paul', 'Jefferson_City', 'Saint_Louis', 'St._Louis', 'Gateway_to_the_West', 'Billings', 'Great_Falls', 'Omaha', 'Concord', 'Manchester', 'Trenton', 'Albuquerque', 'New_Netherland', 'Albany', 'Erie_Canal', 'New_York', 'New_York_City', 'Greater_New_York', 'West_Point', 'Niagara_Falls', 'Schenectady', 'Bismarck', 'Fargo', 'Cincinnati', 'Tulsa', 'Chester', 'Philadelphia', 'City_of_Brotherly_Love', 'Pierre', 'Mount_Vernon', 'Charleston', 'Huntington', 'Morgantown', 'Parkersburg', 'Wheeling', 'Casper', 'Ciudad_Bolivar', 'Aare', 'Aar', 'Aare_River', 'Acheron', 'River_Acheron', 'Adige', 'River_Adige', 'Aire', 'River_Aire', 'Aire_River', 'Alabama', 'Alabama_River', 'Allegheny', 'Allegheny_River', 'Amazon', 'Amazon_River', 'Amur', 'Amur_River', 'Heilong_Jiang', 'Heilong', 'Angara', 'Angara_River', 'Tunguska', 'Upper_Tunguska', 'Apalachicola', 'Apalachicola_River', 'Araguaia', 'Araguaia_River', 'Araguaya', 'Araguaya_River', 'Aras', 'Araxes', 'Arauca', 'Argun', 'Argun_River', 'Ergun_He', 'Arkansas', 'Arkansas_River', 'Arno', 'Arno_River', 'River_Arno', 'Avon', 'River_Avon', 'Upper_Avon', 'Upper_Avon_River', 'Avon', 'River_Avon', 'Bighorn', 'Bighorn_River', 'Big_Sioux_River', 'Brahmaputra', 'Brahmaputra_River', 'Brazos', 'Brazos_River', 'Caloosahatchee', 'Caloosahatchee_River', 'Cam', 'River_Cam', 'Cam_River', 'Canadian', 'Canadian_River', 'Cape_Fear_River', 'Chao_Phraya', 'Charles', 'Charles_River', 'Chattahoochee', 'Chattahoochee_River', 'Cimarron', 'Cimarron_River', 'Clinch_River', 'Clyde', 'Cocytus', 'River_Cocytus', 'Colorado', 'Colorado_River', 'Colorado', 'Colorado_River', 'Columbia', 'Columbia_River', 'Congo', 'Congo_River', 'Zaire_River', 'Connecticut', 'Connecticut_River', 'Coosa', 'Coosa_River', 'Cumberland', 'Cumberland_River', 'Danube', 'Danube_River', 'Danau', 'Darling', 'Darling_River', 'Delaware', 'Delaware_River', 'Demerara', 'Detroit_River', 'Dnieper', 'Dnieper_River', 'Don', 'Don_River', 'Ebro', 'Ebro_River', 'Elbe', 'Elbe_River', 'Elizabeth_River', 'Euphrates', 'Euphrates_River', 'Flint', 'Flint_River', 'Forth', 'Forth_River', 'Fox_River', 'Ganges', 'Ganges_River', 'Gan_Jiang', 'Kan_River', 'Garonne', 'Garonne_River', 'Gila', 'Gila_River', 'Grand_River', 'Green', 'Green_River', 'Housatonic', 'Housatonic_River', 'Huang_He', 'Hwang_Ho', 'Yellow_River', 'Hudson', 'Hudson_River', 'IJssel', 'IJssel_river', 'Illinois_River', 'Indigirka', 'Indigirka_River', 'Indus', 'Indus_River', 'Irrawaddy', 'Irrawaddy_River', 'Irtish', 'Irtish_River', 'Irtysh', 'Irtysh_River', 'Isere', 'Isere_River', 'James', 'James_River', 'James', 'James_River', 'Jordan', 'Jordan_River', 'Kansas', 'Kansas_River', 'Kaw_River', 'Kasai', 'Kasai_River', 'River_Kasai', 'Kissimmee', 'Kissimmee_River', 'Klamath', 'Klamath_River', 'Kura', 'Kura_River', 'Lake_Chad', 'Chad', 'Lehigh_River', 'Lena', 'Lena_River', 'Lethe', 'River_Lethe', 'Limpopo', 'Crocodile_River', 'Little_Bighorn', 'Little_Bighorn_River', 'Little_Horn', 'Little_Missouri', 'Little_Missouri_River', 'Little_Sioux_River', 'Little_Wabash', 'Little_Wabash_River', 'Loire', 'Loire_River', 'Mackenzie', 'Mackenzie_River', 'Madeira', 'Madeira_River', 'Magdalena', 'Magdalena_River', 'Mekong', 'Mekong_River', 'Merrimack', 'Merrimack_River', 'Meuse', 'Meuse_River', 'Milk', 'Milk_River', 'Mississippi', 'Mississippi_River', 'Missouri', 'Missouri_River', 'Mobile', 'Mobile_River', 'Mohawk_River', 'Monongahela', 'Monongahela_River', 'Moreau_River', 'Murray', 'Murray_River', 'Murrumbidgee', 'Murrumbidgee_River', 'Namoi', 'Namoi_River', 'Nan', 'Nan_River', 'Neckar', 'Neckar_River', 'Neosho', 'Neosho_River', 'Neva', 'Neva_River', 'New_River', 'Niagara', 'Niagara_River', 'Niger', 'Niger_River', 'Nile', 'Nile_River', 'North_Platte', 'North_Platte_River', 'Ob', 'Ob_River', 'Oder', 'Oder_River', 'Ohio', 'Ohio_River', 'Orange', 'Orange_River', 'Orinoco', 'Orinoco_River', 'Osage', 'Osage_River', 'Outaouais', 'Ottawa', 'Ottawa_river', 'Ouachita', 'Ouachita_River', 'Ouse', 'Ouse_River', 'Parana', 'Parana_River', 'Parnaiba', 'Parnahiba', 'Pearl_River', 'Pee_Dee', 'Pee_Dee_River', 'Penobscot', 'Penobscot_River', 'Ping', 'Ping_River', 'Platte', 'Platte_River', 'Po', 'Po_River', 'Potomac', 'Potomac_River', 'Purus', 'Purus_River', 'Rappahannock', 'Rappahannock_River', 'Rhine', 'Rhine_River', 'Rhein', 'Rhone', 'Rhone_River', 'Rio_Grande', 'Rio_Bravo', 'Russian_River', 'Saale', 'Saale_River', 'Sabine', 'Sabine_River', 'Sacramento_River', 'Saint_John', 'Saint_John_River', 'St._John', 'St._John_River', 'Saint_Johns', 'Saint_Johns_River', 'St._Johns', 'St._Johns_River', 'Saint_Lawrence', 'Saint_Lawrence_River', 'St._Lawrence', 'St._Lawrence_River', 'Sambre', 'Sambre_River', 'San_Joaquin_River', 'Sao_Francisco', 'Saone', 'Saone_River', 'Savannah', 'Savannah_River', 'Scheldt', 'Scheldt_River', 'Seine', 'Seine_River', 'Severn', 'River_Severn', 'Severn_River', 'Severn', 'Severn_River', 'Seyhan', 'Seyhan_River', 'Shari', 'Shari_River', 'Chari', 'Chari_River', 'Shenandoah_River', 'Styx', 'River_Styx', 'Sun_River', 'Suriname_River', 'Surinam_River', 'Susquehanna', 'Susquehanna_River', 'Tagus', 'Tagus_River', 'Tallapoosa', 'Tallapoosa_River', 'Tennessee', 'Tennessee_River', 'Thames', 'River_Thames', 'Thames_River', 'Tiber', 'Tevere', 'Tigris', 'Tigris_River', 'Tocantins', 'Tocantins_River', 'Tombigbee', 'Tombigbee_River', 'Trent', 'River_Trent', 'Trent_River', 'Trinity_River', 'Tunguska', 'Lower_Tunguska', 'Tunguska', 'Stony_Tunguska', 'Tyne', 'River_Tyne', 'Tyne_River', 'Urubupunga', 'Urubupunga_Falls', 'Uruguay_River', 'Vetluga', 'Vetluga_River', 'Vistula', 'Vistula_River', 'Volga', 'Volga_River', 'Volkhov', 'Volkhov_River', 'Volta', 'Weser', 'Weser_River', 'Willamette', 'Willamette_River', 'Yalu', 'Yalu_River', 'Chang_Jiang', 'Changjiang', 'Chang', 'Yangtze', 'Yangtze_River', 'Yangtze_Kiang', 'Yazoo', 'Yazoo_River', 'Yenisei', 'Yenisei_River', 'Yenisey', 'Yenisey_River', 'Yukon', 'Yukon_River', 'Zambezi', 'Zambezi_River', 'Zhu_Jiang', 'Canton_River', 'Chu_Kiang', 'Pearl_River', 'Charon', 'Achilles', 'Algonkian', 'Algonkin', 'Arikara', 'Aricara', 'Chinook', 'Conoy', 'Halchidhoma', 'Hidatsa', 'Gros_Ventre', 'Kansa', 'Kansas', 'Karok', 'Maidu', 'Maricopa', 'Missouri', 'Mohave', 'Mojave', 'Ofo', 'Omaha', 'Maha', 'Osage', 'Oto', 'Otoe', 'Pamlico', 'Ponca', 'Ponka', 'Quapaw', 'Shahaptian', 'Sahaptin', 'Sahaptino', 'Shawnee', 'Tsimshian', 'Walapai', 'Hualapai', 'Hualpai', 'Yeniseian', 'Yakut', 'Cartier', 'Jacques_Cartier', 'Oldfield', 'Barney_Oldfield', 'Berna_Eli_Oldfield', 'Taxodium_distichum', 'Montezuma_cypress', 'Mexican_swamp_cypress', 'Taxodium_mucronatum', 'Pistia_stratiotes', 'Pistia_stratoites', 'Rorippa_amphibia', 'Nasturtium_amphibium', 'Arundo_donax', 'Phragmites', 'Betula_nigra', 'Eucalyptus_camaldulensis', 'Eucalyptus_rostrata', 'Amorpha_fruticosa', 'Senegal_gum'] Let's go crazy and say that only if the word "River" appears in the lemma, it is a river: >>> list(chain(*[ [j for j in i.lemma_names if j[0].isupper() and "River" in j] for i in wn.all_synsets() if "river" in i.definition])) [out]: ['Yalu_River', 'Dawson_River_salmon', 'Aare_River', 'River_Acheron', 'River_Adige', 'River_Aire', 'Aire_River', 'Alabama_River', 'Allegheny_River', 'Amazon_River', 'Amur_River', 'Angara_River', 'Apalachicola_River', 'Araguaia_River', 'Araguaya_River', 'Argun_River', 'Arkansas_River', 'Arno_River', 'River_Arno', 'River_Avon', 'Upper_Avon_River', 'River_Avon', 'Bighorn_River', 'Big_Sioux_River', 'Brahmaputra_River', 'Brazos_River', 'Caloosahatchee_River', 'River_Cam', 'Cam_River', 'Canadian_River', 'Cape_Fear_River', 'Charles_River', 'Chattahoochee_River', 'Cimarron_River', 'Clinch_River', 'River_Cocytus', 'Colorado_River', 'Colorado_River', 'Columbia_River', 'Congo_River', 'Zaire_River', 'Connecticut_River', 'Coosa_River', 'Cumberland_River', 'Danube_River', 'Darling_River', 'Delaware_River', 'Detroit_River', 'Dnieper_River', 'Don_River', 'Ebro_River', 'Elbe_River', 'Elizabeth_River', 'Euphrates_River', 'Flint_River', 'Forth_River', 'Fox_River', 'Ganges_River', 'Kan_River', 'Garonne_River', 'Gila_River', 'Grand_River', 'Green_River', 'Housatonic_River', 'Yellow_River', 'Hudson_River', 'Illinois_River', 'Indigirka_River', 'Indus_River', 'Irrawaddy_River', 'Irtish_River', 'Irtysh_River', 'Isere_River', 'James_River', 'James_River', 'Jordan_River', 'Kansas_River', 'Kaw_River', 'Kasai_River', 'River_Kasai', 'Kissimmee_River', 'Klamath_River', 'Kura_River', 'Lehigh_River', 'Lena_River', 'River_Lethe', 'Crocodile_River', 'Little_Bighorn_River', 'Little_Missouri_River', 'Little_Sioux_River', 'Little_Wabash_River', 'Loire_River', 'Mackenzie_River', 'Madeira_River', 'Magdalena_River', 'Mekong_River', 'Merrimack_River', 'Meuse_River', 'Milk_River', 'Mississippi_River', 'Missouri_River', 'Mobile_River', 'Mohawk_River', 'Monongahela_River', 'Moreau_River', 'Murray_River', 'Murrumbidgee_River', 'Namoi_River', 'Nan_River', 'Neckar_River', 'Neosho_River', 'Neva_River', 'New_River', 'Niagara_River', 'Niger_River', 'Nile_River', 'North_Platte_River', 'Ob_River', 'Oder_River', 'Ohio_River', 'Orange_River', 'Orinoco_River', 'Osage_River', 'Ouachita_River', 'Ouse_River', 'Parana_River', 'Pearl_River', 'Pee_Dee_River', 'Penobscot_River', 'Ping_River', 'Platte_River', 'Po_River', 'Potomac_River', 'Purus_River', 'Rappahannock_River', 'Rhine_River', 'Rhone_River', 'Russian_River', 'Saale_River', 'Sabine_River', 'Sacramento_River', 'Saint_John_River', 'St._John_River', 'Saint_Johns_River', 'St._Johns_River', 'Saint_Lawrence_River', 'St._Lawrence_River', 'Sambre_River', 'San_Joaquin_River', 'Saone_River', 'Savannah_River', 'Scheldt_River', 'Seine_River', 'River_Severn', 'Severn_River', 'Severn_River', 'Seyhan_River', 'Shari_River', 'Chari_River', 'Shenandoah_River', 'River_Styx', 'Sun_River', 'Suriname_River', 'Surinam_River', 'Susquehanna_River', 'Tagus_River', 'Tallapoosa_River', 'Tennessee_River', 'River_Thames', 'Thames_River', 'Tigris_River', 'Tocantins_River', 'Tombigbee_River', 'River_Trent', 'Trent_River', 'Trinity_River', 'River_Tyne', 'Tyne_River', 'Uruguay_River', 'Vetluga_River', 'Vistula_River', 'Volga_River', 'Volkhov_River', 'Weser_River', 'Willamette_River', 'Yalu_River', 'Yangtze_River', 'Yazoo_River', 'Yenisei_River', 'Yenisey_River', 'Yukon_River', 'Zambezi_River', 'Canton_River', 'Pearl_River'] Much better but i think you're better off just crawling the names from http://en.wikipedia.org/wiki/Lists_of_rivers . Have fun!
To show that the solution for "river" using NLTK wordnet won't scale other entities, and also answer #tripleee's question. If you're looking for animals, you can simply recursively get all hyponyms of animals, as such: list(set([w for s in vehicle.closure(lambda s:s.hyponyms()) for w in s.lemma_names]))
We can use sentence tagging to get proper nouns. Filter them for required output. Perhaps: from nltk.tag import pos_tag sentence = "Amazon is great river. Mississippi is awesome too." tagged_sent = pos_tag(sentence.split()) tagged_sent will yield similar tagged out where NNP is the proper noun. [('Amazon', 'NNP'), ('is', 'VBZ'), ('great', 'JJ'), ('river.', 'NNP'), ('Mississippi', 'NNP'), ('is', 'VBZ'), ('awesome', 'VBN'), ('too.', '-NONE-')] propernouns = [word for word,pos in tagged_sent if pos == 'NNP'] propernouns would return ['Amazon', 'river.', 'Mississippi'] You can set categories for each and use a function to return them.
If you want to get lists of things of a specific type e.g. rivers http://dbPedia.org, http://freebase.com or http://wikidata.org are the better choice. This dbPedia SPARQL query returns all rivers known to Wikipedia: SELECT ?name ?description WHERE { {?river rdf:type dbpedia-owl:River} . ?river foaf:name ?name . ?river rdfs:comment ?description . } ORDER BY ?name http://bit.ly/1jc8Ip6