How to combine three columns into one in python - python
Excel table = this is the excel file screenshot which is how final result should be. Please take closer look at "Lifestyle" section.
I can't figure out how to make my python just like the excel picture screenshot. "Lifestyle" section needs to have 2 more sub-columns combined just like in a picture below. Any help would be appreciated.
I'm gonna post picture below PyCharm screenshot:
Here is my code:
#convert inches to feet-inches
def inch_to_feet(x):
feet = x // 12
inch = x % 12
return str(feet)+"'"+str(inch)+'"'
#file opened
print("Hello")
roster = input("Please enter a roster file: ")
if roster != "roster_extended.csv":
print("Invalid name")
elif roster == "roster_extended.csv":
additional_name = input("There are 13 lines in this file. Would you like to enter an additional names? (Y/N): ")
if additional_name == "Y":
input("How many more names?: ")
infile = open("roster_extended.csv", 'r')
b = infile.readline()
b = infile.readlines()
header = '{0:>12} {1:>35} {2:>3} {3:>16} {4:>5} {5:>3} {6:>9}'.format("FirstName","LastName","Age","Occupation","Ht","Wt","lifestyle")
print(header)
with open("roster_extended.csv", "a+") as infile:
b = infile.write(input("Enter first name: "))
for person in b:
newperson = person.replace("\n", "").split(",")
newperson[4] = eval(newperson[4])
newperson[4] = inch_to_feet(newperson[4])
newperson
formatted='{0:>12} {1:>35} {2:>3} {3:>16} {4:>5} {5:>3} {6:>9}'.format(newperson[0],newperson[1],newperson[2],newperson[3],newperson[4],newperson[5],newperson[6])
print(formatted)
Here is the output I get:
FirstName LastName Age Occupation Ht Wt lifestyle
Anna Barbara 35 nurse 5'3" 129
Catherine Do 45 physicist 5'5" 135
Eric Frederick 28 teacher 5'5" 140
Gabriel Hernandez 55 surgeon 5'7" 150 x
Ivy Joo 31 engineer 5'2" 126 x
Kelly Marks 21 student 5'4" 132
Nancy Owens 60 immunologist 5'8" 170 x
Patricia Qin 36 dental assistant 4'11" 110 x
Roderick Stevenson 51 bus driver 5'6" 160 x
Tracy Umfreville 42 audiologist 5'7" 156 x
Victoria Wolfeschlegelsteinhausenbergerdorff 38 data analyst 5'8" 158
Lucy Xi 49 professor 5'9" 161
Yolanda Zachary 58 secretary 5'10" 164 x
Brief explanation of the solution:
You gave tabulated data as input (there are several ways to tabulate: check here). Since you're starting with python the solution keeps within standard library (thus not resorting to external libraries). Only format() and class variables are used to keep track of column width (if you delete elements you'll want to update the variables.) This programmatically automates tabulation.
Since you are starting out, I recommend putting a breakpoint in __init__() and __new__() to observe their behavior.
I used Enum because conceptually it's the right tool for the job. You only need to understand Enum.name and Enum.value, as for everything else consider it a normal class.
There are 2 output files, one in tabulated form and the other in barebone csv.
(For the most part the solution is "canonical" (or close). The procedural part was rushed, but gives a sufficient idea.)
import csv
import codecs
from enum import Enum
from pathlib import Path
IN_FILE = Path("C:\\your_path\\input.csv")
OUT_FILE = Path("C:\\your_path\\output1.csv")
OUT_FILE_TABULATE = Path("C:\\your_path\\output2.csv")
def read_csv(file) -> list:
with open(file) as csv_file:
reader_csv = csv.reader(csv_file, delimiter=',')
for row in reader_csv:
yield row
def write_file(file, result_ordered):
with codecs.open(file, "w+", encoding="utf-8") as file_out:
for s in result_ordered:
file_out.write(s + '\n')
class LifeStyle(Enum):
Sedentary = 1
Active = 2
Moderate = 3
def to_list(self):
list_life_style = list()
for one_style in LifeStyle:
if one_style is self:
list_life_style.append('x')
else:
list_life_style.append('')
return list_life_style
def tabulate(self):
str_list_life_style = list()
for one_style in LifeStyle:
if one_style is not self:
str_list_life_style.append('{: ^{width}}'.format(' ', width=len(one_style.name)))
else:
str_list_life_style.append('{: ^{width}}'.format('x', width=len(self.name)))
return str_list_life_style
def tabulate_single_column(self):
return '{: >{width}}'.format(str(self.name), width=len(LifeStyle.Sedentary.name))
#staticmethod
def header_single_column():
return ' {}'.format(LifeStyle.__name__)
#staticmethod
def header():
return ' {} {} {}'.format(
LifeStyle.Sedentary.name,
LifeStyle.Active.name,
LifeStyle.Moderate.name,
)
class Person:
_FIRST_NAME = "First Name"
_LAST_NAME = "Last Name"
_AGE = "Age"
_OCCUPATION = "Occupation"
_HEIGHT = "Height"
_WEIGHT = "Weight"
max_len_first_name = len(_FIRST_NAME)
max_len_last_name = len(_LAST_NAME)
max_len_occupation = len(_OCCUPATION)
def __new__(cls, first_name, last_name, age, occupation, height, weight, lifestyle):
cls.max_len_first_name = max(cls.max_len_first_name, len(first_name))
cls.max_len_last_name = max(cls.max_len_last_name, len(last_name))
cls.max_len_occupation = max(cls.max_len_occupation, len(occupation))
return super().__new__(cls)
def __init__(self, first_name, last_name, age, occupation, height, weight, lifestyle):
self.first_name = first_name
self.last_name = last_name
self.age = age
self.occupation = occupation
self.height = height
self.weight = weight
self.lifestyle = lifestyle
#classmethod
def _tabulate_(cls, first_name, last_name, age, occupation, height, weight):
first_part = '{: >{m_first}} {: >{m_last}} {: >{m_age}} {: <{m_occup}} {: <{m_height}} {: >{m_weight}}'.format(
first_name,
last_name,
age,
occupation,
height,
weight,
m_first=Person.max_len_first_name,
m_last=Person.max_len_last_name,
m_occup=Person.max_len_occupation,
m_age=len(Person._AGE),
m_height=len(Person._HEIGHT),
m_weight=len(Person._WEIGHT))
return first_part
#classmethod
def header(cls, header_life_style):
first_part = Person._tabulate_(Person._FIRST_NAME, Person._LAST_NAME, Person._AGE, Person._OCCUPATION,
Person._HEIGHT, Person._WEIGHT)
return '{}{}'.format(first_part, header_life_style)
def __str__(self):
first_part = Person._tabulate_(self.first_name, self.last_name, self.age, self.occupation, self.height,
self.weight)
return '{}{}'.format(first_part, ' '.join(self.lifestyle.tabulate()))
def single_column(self):
first_part = Person._tabulate_(self.first_name, self.last_name, self.age, self.occupation, self.height,
self.weight)
return '{} {}'.format(first_part, self.lifestyle.tabulate_single_column())
def populate(persons_populate):
for line in read_csv(IN_FILE):
life_style = ''
if line[6] == 'x':
life_style = LifeStyle.Sedentary
elif line[7] == 'x':
life_style = LifeStyle.Moderate
elif line[8] == 'x':
life_style = LifeStyle.Active
persons_populate.append(Person(line[0], line[1], line[2], line[3], line[4], line[5], life_style))
return persons_populate
persons = populate(list())
print(Person.header(LifeStyle.header()))
for person in persons:
print(person)
write_file(OUT_FILE_TABULATE, [str(item) for item in persons])
# add new persons here
persons.append(Person("teste", "teste", "22", "worker", "5'8\"", "110", LifeStyle.Active))
final_list = list()
for person in persons:
one_list = [person.first_name, person.last_name, person.age, person.occupation, person.height,
person.weight]
one_list.extend([item.strip() for item in person.lifestyle.tabulate()])
final_list.append(','.join(one_list))
write_file(OUT_FILE, final_list)
print("\n", Person.header(LifeStyle.header_single_column()))
for person in persons:
print(person.single_column())
output1.csv:
Anna,Barbara,35,nurse,5'3",129,,,x
Catherine,Do,45,physicist,5'5",135,,x,
Eric,Frederick,28,teacher,5'5",140,,,x
Gabriel,Hernandez,55,surgeon,5'7",150,x,,
Ivy,Joo,31,engineer,5'2",126,x,,
Kelly,Marks,21,student,5'4",132,,x,
Nancy,Owens,60,immunologist,5'8",170,x,,
Patricia,Qin,36,dental assistant,4'11",110,x,,
Roderick,Stevenson,51,bus driver,5'6",160,x,,
Tracy,Umfreville,42,audiologist,5'7",156,x,,
Victoria,Wolfeschlegelsteinhausenbergerdorff,38,data analyst ,5'8",158,,,x
Lucy,Xi,49,professor,5'9",161,,,x
Yolanda,Zachary,58,secretary,5'10",164,x,,
teste,teste,22,worker,5'8",110,,x,
output2.csv:
Anna Barbara 35 nurse 5'3" 129 x
Catherine Do 45 physicist 5'5" 135 x
Eric Frederick 28 teacher 5'5" 140 x
Gabriel Hernandez 55 surgeon 5'7" 150 x
Ivy Joo 31 engineer 5'2" 126 x
Kelly Marks 21 student 5'4" 132 x
Nancy Owens 60 immunologist 5'8" 170 x
Patricia Qin 36 dental assistant 4'11" 110 x
Roderick Stevenson 51 bus driver 5'6" 160 x
Tracy Umfreville 42 audiologist 5'7" 156 x
Victoria Wolfeschlegelsteinhausenbergerdorff 38 data analyst 5'8" 158 x
Lucy Xi 49 professor 5'9" 161 x
Yolanda Zachary 58 secretary 5'10" 164 x
single_column:
Anna Barbara 35 nurse 5'3" 129 Moderate
Catherine Do 45 physicist 5'5" 135 Active
Eric Frederick 28 teacher 5'5" 140 Moderate
Gabriel Hernandez 55 surgeon 5'7" 150 Sedentary
Ivy Joo 31 engineer 5'2" 126 Sedentary
Kelly Marks 21 student 5'4" 132 Active
Nancy Owens 60 immunologist 5'8" 170 Sedentary
Patricia Qin 36 dental assistant 4'11" 110 Sedentary
Roderick Stevenson 51 bus driver 5'6" 160 Sedentary
Tracy Umfreville 42 audiologist 5'7" 156 Sedentary
Victoria Wolfeschlegelsteinhausenbergerdorff 38 data analyst 5'8" 158 Moderate
Lucy Xi 49 professor 5'9" 161 Moderate
Yolanda Zachary 58 secretary 5'10" 164 Sedentary
teste teste 22 worker 5'8" 110 Active
Related
TypeError: slice indices must be integers or None or have an __index__ method error in scraping a help page
I created python script to scrape facebook help page. I wanted to scrape cms_object_id, cmsID, name. so these values are in a script tag then firstly tried to find all <script> tags then tried to iterate over this and then there is __bbox inside the tags which contains the values wanted to scrape. so this is my script: import json import requests import bs4 from Essentials import Static class CmsIDs: def GetIDs(): # cont = requests.get(""https://www.facebook.com:443/help"", headers=Static.headers) # syntax error cont = requests.get("https://www.facebook.com:443/help", headers=Static.headers) soup = bs4.BeautifulSoup(cont.content, "html5lib") text = soup.find_all("script") start = "" txtstr = "" for i in range(len(text)): mystr = text[i] # mystr = text[i] print("this is: ", mystr.find('__bbox":')) if text[i].get_text().find('__bbox":') != -1: # print(i, text[i].get_text()) txtstr += text[i].get_text() start = text[i].get_text().find('__bbox":') + len('__bbox":') print('start:', start) count = 0 for end, char in enumerate(txtstr[start:], start): if char == '{': count += 1 if char == '}': count -= 1 if count == 0: break print('end:', end) # --- convert JSON string to Python structure (dict/list) --- data = json.loads(txtstr[start:end+1]) # pp.pprint(data) print('--- search ---') CmsIDs.search(data) # --- use recursion to find all 'cms_object_id', 'cmsID', 'name' --- def search(data): if isinstance(data, dict): found = False if 'cms_object_id' in data: print('cms_object_id', data['cms_object_id']) found = True if 'cmsID' in data: print('cmsID', data['cmsID']) found = True if 'name' in data: print('name', data['name']) found = True if found: print('---') for val in data.values(): CmsIDs.search(val) if isinstance(data, list): for val in data: CmsIDs.search(val) if __name__ == '__main__': CmsIDs.GetIDs() the page contains cms_object_id, cmsID, name. so wanted to scrape all these 3 values but I am getting an error: for end, char in enumerate(txtstr[start:], start): TypeError: slice indices must be integers or None or have an __index__ method so how can I solve this error and reach ultimate goal?
Note: Since I'm unfamiliar with and failed to install Essentials, and also because ""https://www.facebook.com:443/help"" raises a syntax error (there should only be one quote on each side of the string), I changed the requests line in my code. cont = requests.get('https://www.facebook.com:443/help', headers={'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9'}) TypeError: slice indices must be integers or None or have an __index__ method You've initiated start as a string [start = ""] and it needs to be an integer. Unless the if text[i].get_text().find('__bbox":') != -1.... block is entered, start remains a string. if you just want to not get this error, you could just exit the program if start hasn't been updated [indicating that __bbox": wasn't found in any of the script tags]. print('start:', start) if start == "": print('{__bbox":} not found') return count = 0 But that still leaves the problem of __bbox": not being found; I'm not sure why, but the issue is resolved for me if I don't use the html5lib parser - just changing to BeautifulSoup(cont.content) resolved this issue. # soup = bs4.BeautifulSoup(cont.content, "html5lib") soup = bs4.BeautifulSoup(cont.content) # don't define the parser # soup = bs4.BeautifulSoup(cont.content, "html.parser") # you could also try other parsers Other suggestions Your code will probably work without these, but you might want to consider these suggested improvements for error-handling: Filter the script Tags If the text ResultSet only has script tags that contain __bbox":, you avoid looping unnecessarily through the 100+ other scripts, and you won't have to check with if....find('__bbox":') anymore. text = soup.find_all(lambda t: t.name == 'script' and '"__bbox"' in t.get_text()) for mystr in [s.get_text() for s in text]: print("this is: ", mystr.find('__bbox":')) txtstr += mystr start = mystr.find('__bbox":') + len('__bbox":') Initiate end You should initiate the end variable [like end = 0] before the for end, char ... loop because you're using it after the loop as well. print('end:', end) data = json.loads(txtstr[start:end+1]) If txtstr[start:] is empty somehow, these lines will raise error/s since end would not be defined yet. Use a JavaScript Parser This will make the previous suggestions unnecessary, but as it is json.loads will raise an error if txtstr[start:end+1] is empty somehow, or if it contains any unpaired [and likely escaped] }or{. So, it might be more reliable to use a parser rather than just trying to walk through the string. I have this function that uses slimit to find values from strings containing JavaScript code. (It's far from perfect, but it seems to for this script, at least.) GetIDs() could be re-written as below. # import json, requests, bs4, slimit # from slimit.visitors import nodevisitor # def findObj_inJS... ## PASTE FROM https://pastebin.com/UVcLniSG # class CmsIDs: def GetIDs(): # cont=requests.get('https://www.facebook.com:443/help',headers=Static.headers) cont = requests.get('https://www.facebook.com:443/help', headers={ 'accept': ';'.join( [ 'text/html,application/xhtml+xml,application/xml', 'q=0.9,image/avif,image/webp,image/apng,*/*', 'q=0.8,application/signed-exchange', 'v=b3', 'q=0.9' ])}) ## in case of request errors ## try: cont.raise_for_status() except Exception as e: print('failed to fetch page HTML -', type(e), e) return print('fetched', cont.url, 'with', cont.status_code, cont.reason) soup = bs4.BeautifulSoup(cont.content) scrCond = lambda t: t.name == 'script' and '"__bbox"' in t.get_text() jScripts = [s.get_text() for s in soup.find_all(scrCond)] print(f'Found {len(jScripts)} script tags containing {{"__bbox"}}') data = [findObj_inJS(s,'"__bbox"') for s in jScripts] print('--- search ---') CmsIDs.search(data) # def search(data).... Return the Data This isn't for error-handling, but if you return the data printed by CmsIDs.search you could save it for further use. def search(data): rList, dKeys = [], ['cms_object_id', 'cmsID', 'name'] if isinstance(data, dict): dObj = {k: data[k] for k in dKeys if k in data} rList += [dObj] if dObj else [] for k, v in dObj.items(): print(k, v) if dObj: print('---') for val in data.values(): rList += CmsIDs.search(val) if isinstance(data, list): for val in data: rList += CmsIDs.search(val) return rList The printed result will be the same as before, but if you change the last line of GetIDs to return CmsIDs.search(data) and then define a variable cmsList = CmsIDs.GetIDs() then cmsList will be a list of dctionaries, which you could then [for example] save to csv with pandas and view as a table on a spreadsheet. # import pandas pandas.DataFrame(cmsList).to_csv('CmsIDs_GetIDs.csv', index=False) or print the markdown for the table [of the results I got] below print(pandas.DataFrame(cmsList, dtype=str).fillna('').to_markdown()) [index] cms_object_id name cmsID 0 Використання Facebook 1 570785306433644 Створення облікового запису 2 396528481579093 Your Profile 3 Додати й редагувати інформацію у своєму профілі 1017657581651994 4 Ваші основна світлина й обкладинка 1217373834962306 5 Поширення дописів у своєму профілі та керування ними 1640261589632787 6 Усунення проблем 191128814621591 7 1540345696275090 Додавання в друзі 8 Додавання друзів 246750422356731 9 Люди, яких ви можете знати 336320879782850 10 Control Who Can Friend and Follow You 273948399619967 11 Upload Your Contacts to Facebook 1041444532591371 12 Видалення з друзів чи блокування користувача 1000976436606344 13 312959615934334 Facebook Dating 14 753701661398957 Ваша головна сторінка 15 How Feed Works 1155510281178725 16 Control What You See in Feed 964154640320617 17 Like and React to Posts 1624177224568554 18 Пошук 821153694683665 19 Translate Feed 1195058957201487 20 Memories 1056848067697293 21 1071984682876123 Повідомлення 22 Надсилання повідомлень 487151698161671 23 Переглянути повідомлення й керувати ними 1117039378334299 24 Поскаржитися на повідомлення 968185709965912 25 Відеовиклики 287631408243374 26 Fix a Problem 1024559617598844 27 753046815962474 Reels 28 Watching Reels 475378724739085 29 Creating Reels 867690387846185 30 Managing Your Reels 590925116168623 31 862926927385914 Розповіді 32 Як створити розповідь і поширити її 126560554619115 33 View and Reply to Stories 349797465699432 34 Page Stories 425367811379971 35 1069521513115444 Світлини й відео 36 Світлини 1703757313215897 37 Відео 1738143323068602 38 Going Live 931327837299966 39 Albums 490693151131920 40 Додавання позначок 267689476916031 41 Усунення проблеми 507253956146325 42 1041553655923544 Відео у Watch 43 Перегляд шоу та відео 401287967326510 44 Fix a Problem 270093216665260 45 2402655169966967 Gaming 46 Gaming on Facebook 385894640264219 47 Платежі в іграх 248471068848455 48 282489752085908 Сторінки 49 Interact with Pages 1771297453117418 50 Створити сторінку й керувати нею 135275340210354 51 Імена й імена користувачів 1644118259243888 52 Керування налаштуваннями сторінки 1206330326045914 53 Customize a Page 1602483780062090 54 Publishing 1533298140275888 55 Messaging 994476827272050 56 Insights 794890670645072 57 Banning and Moderation 248844142141117 58 Усунути проблему 1020132651404616 59 1629740080681586 Групи 60 Join and Choose Your Settings 1210322209008185 61 Post, Participate and Privacy 530628541788770 62 Create, Engage and Manage Settings 408334464841405 63 Керування групою для адміністраторів 1686671141596230 64 Community Chats 3397387057158160 65 Pages in Groups 1769476376397128 66 Fix a Problem 1075368719167893 67 1076296042409786 Події 68 Create and Manage an Event 572885262883136 69 View and Respond to Events 1571121606521970 70 Facebook Classes 804063877226739 71 833144153745643 Fundraisers and Donations 72 Creating a Fundraiser 356680401435429 73 Пожертва в рамках збору коштів 1409509059114623 74 Особисті збори коштів 332739730519432 75 For Nonprofits 1640008462980459 76 Fix a Problem 2725517974129416 77 1434403039959381 Meta Pay 78 Платежі в іграх 248471068848455 79 Payments in Messages 863171203733904 80 Пожертва в рамках збору коштів 1409509059114623 81 Квитки на заходи 1769557403280350 82 Monetization and Payouts 1737820969853848 83 1713241952104830 Marketplace 84 Як працює Marketplace 1889067784738765 85 Buying on Marketplace 272975853291364 86 Продаж на Marketplace 153832041692242 87 Sell with Shipping on Marketplace 773379109714742 88 Using Checkout on Facebook 1411280809160810 89 Групи з купівлі й продажу 319768015124786 90 Get Help with Marketplace 1127970530677256 91 1642635852727373 Додатки 92 Manage Your Apps 942196655898243 93 Видимість і конфіденційність додатка 1727608884153160 94 866249956813928 Додатки Facebook для мобільних пристроїв 95 Додаток для Android 1639918076332350 96 iPhone and iPad Apps 1158027224227668 97 Facebook Lite App 795302980569545 98 273947702950567 Спеціальні можливості 99 Керування обліковим записом 100 1573156092981768 Вхід і пароль 101 Вхід в обліковий запис 1058033620955509 102 Змінення пароля 248976822124608 103 Виправлення проблеми із входом 283100488694834 104 Завантаження посвідчення особи 582999911881572 105 239070709801747 Налаштування облікового запису 106 Як змінити налаштування облікового запису 1221288724572426 107 Ваше ім’я користувача 1740158369563165 108 Спадкоємці 991335594313139 109 1090831264320592 Імена у Facebook 110 1036755649750898 Сповіщення 111 Push, Email and Text Notifications 530847210446227 112 Виберіть, про що отримувати сповіщення 269880466696699 113 Усунення проблем 1719980288275077 114 109378269482053 Налаштування реклами 115 Як працює реклама у Facebook 516147308587266 116 Контроль реклами, яку ви бачите 1075880512458213 117 Ваша інформація та реклама у Facebook 610457675797481 118 1701730696756992 Доступ до вашої інформації та її завантаження 119 250563911970368 Деактивація або видалення облікового запису 120 Конфіденційність і безпека 121 238318146535333 Ваша конфіденційність 122 Керуйте тим, хто може переглядати контент, який ви поширюєте у Facebook 1297502253597210 123 Керування своїми дописами 504765303045427 124 Control Who Can Find You 1718866941707011 125 592679377575472 Безпека 126 Джерела щодо боротьби з жорстоким поводженням 726709730764837 127 Ресурси з допомоги для протидії самогубству та самоушкодженню 1553737468262661 128 Crisis Response 141874516227713 129 Ресурси з правил безпеки для допомоги батькам 1079477105456277 130 Інформація для правоохоронних органів 764592980307837 131 235353253505947 Захист облікового запису 132 Функції безпеки та поради з її забезпечення 285695718429403 133 Сповіщення про вхід і двоетапна перевірка 909243165853369 134 Уникайте спаму та шахрайства 1584206335211143 135 236079651241697 Безпека під час здійснення покупок 136 Розпізнавання шахрайства 1086141928978559 137 Уникнення шахрайства 2374002556073992 138 Купівля на Marketplace 721562085854101 139 Поради щодо безпечної купівлі 123884166448529 140 Купуйте впевнено 1599248863596914 141 Політики та скарги 142 1753719584844061 Скарга на порушення 143 Як поскаржитися на щось? 1380418588640631 144 Don't Have an Account? 1723400564614772 145 1126628984024935 Як повідомити про проблему у Facebook 146 186614050293763 Being Your Authentic Self on Facebook 147 1561472897490627 Повідомлення про порушення конфіденційності 148 1216349518398524 Зламані та фальшиві облікові записи 149 275013292838654 Керування обліковим записом померлої людини 150 About Memorialized Accounts 1017717331640041 151 Request to Memorialize or Remove an Account 1111566045566400 152 399224883474207 Інтелектуальна власність 153 Авторське право 1020633957973118 154 Торговельна марка 507663689427413 155 1735443093393986 Про наші політики
dataframe put must be a unicode string, not 0, how give the string not the dataframe
i try to manipulate some dataframe and i did a function to calculate the distance between 2 cities. def find_distance(A,B): key = '0377f0e6b42a47fe9d30a4e9a2b3bb63' # get api key from: https://opencagedata.com geocoder = OpenCageGeocode(key) result_A = geocoder.geocode(A) lat_A = result_A[0]['geometry']['lat'] lng_A = result_A[0]['geometry']['lng'] result_B = geocoder.geocode(B) lat_B = result_B[0]['geometry']['lat'] lng_B = result_B[0]['geometry']['lng'] return int(geodesic((lat_A,lng_A), (lat_B,lng_B)).kilometers) this is my dataframe 2 32 Mulhouse 1874.0 2 797 16.8 16,3 € 10.012786 13 13 Saint-Étienne 1994.0 3 005 14.3 13,5 € 8.009882 39 39 Roubaix 2845.0 2 591 17.4 15,0 € 6.830968 27 27 Perpignan 2507.0 3 119 15.1 13,3 € 6.727255 40 40 Tourcoing 3089.0 2 901 17.5 15,3 € 6.327547 25 25 Limoges 2630.0 2 807 14.2 12,5 € 6.030424 20 20 Le Mans 2778.0 3 202 14.4 12,3 € 5.789559 there is my code: def clean_text(row): # return the list of decoded cell in the Series instead return [r.decode('unicode_escape').encode('ascii', 'ignore') for r in row] def main(): inFile = "prix_m2_france.xlsx" #On ouvre l'excel inSheetName = "Sheet1" #le nom de l excel cols = ['Ville', 'Prix_moyen', 'Loyer_moyen'] #Les colomnes df =(pd.read_excel(inFile, sheet_name = inSheetName)) df[cols] = df[cols].replace({'€': '', ",": ".", " ": "", "\u202f":""}, regex=True) # df['Prix_moyen'] = df.apply(clean_text) # df['Loyer_moyen'] = df.apply(clean_text) df['Prix_moyen'] = df['Prix_moyen'].astype(float) df['Loyer_moyen'] = df['Loyer_moyen'].astype(float) # df["Prix_moyen"] += 1 df["revenu"] = (df['Loyer_moyen'] * 12) / (df["Prix_moyen"] * 1.0744) * 100 # df['Ville'].replace({'Le-Havre': 'Le Havre', 'Le-Mans': 'Le Mans'}) df["Ville"] = df['Ville'].replace(['Le-Havre', 'Le-Mans'], ['Le Havre', 'Le Mans']) df["distance"] = find_distance("Paris", df["Ville"]) df2 = df.sort_values(by = 'revenu', ascending = False) print(df2.head(90)) main() df["distance"] = find_distance("Paris", df["Ville"]) fails and give me this error: opencage.geocoder.InvalidInputError: Input must be a unicode string, not 0 Paris 1 Marseille 2 Lyon 3 T I imagine it as a loop where i will put the distance between paris and the city but i guess it take all the dataframe on my first value. Thanks for your help (Edit, i just pasted a part of my dataframe)
You can try something like : df["distance"] = [find_distance("Paris", city) for city in df["Ville"]]
Unable to format output in Python correctly
I am unable to format in Python correctly. Below is what my list looks like. I am not sure why the spacing is off on some of the fields. Below is my code as well as a snip of how it reads. def main(): golf_file = open('golf.txt', 'r') #open file first_name = golf_file.readline() #read first line print('First Name\tLast Name\tHandicap\tGolf Score\tOver, Under or Par') #print headings while first_name != '': #while statement for loop last_name = golf_file.readline() handicap = golf_file.readline() golf_score = golf_file.readline() #stripping newline from each string first_name = first_name.rstrip('\n') last_name = last_name.rstrip('\n') handicap = handicap.rstrip('\n') golf_score = golf_score.rstrip('\n') handicap_num = float(handicap) golfscore_num = int(golf_score) #if statement to determine if golf score is over, under or par if golfscore_num == 80: OverUnderPar = ('Par') elif golfscore_num < 80: OverUnderPar = ('Under Par') else: OverUnderPar = ('Over Par') #print info with two tabs for positioning. print( first_name, '\t''\t', last_name, '\t''\t', handicap_num, '\t', '\t', golfscore_num, '\t', '\t', OverUnderPar) first_name = golf_file.readline() golf_file.close() #close file main() First Name Last Name Handicap Golf Score Over, Under or Par Andrew Marks 11.2 72 Under Par Betty Franks 12.8 89 Over Par Connie William 14.6 92 Over Par Donny Ventura 9.9 78 Under Par Ernie Turner 10.1 81 Over Par Fred Smythe 8.1 75 Under Par Greg Tucker 7.2 72 Under Par Henry Zebulon 8.3 83 Over Par Ian Fleming 4.2 72 Under Par Jan Holden 7.7 84 Over Par Kit Possum 7.9 79 Under Par Landy Bern 10.3 93 Over Par Mona Docker 11.3 98 Over Par Kevin Niles 7.1 80 Par Pam Stiles 10.9 87 Over Par Russ Hunt 5.6 73 Under Par
If the name is too big(over 6 char) the tab will be moved one more down. You can either check if the name is too big and move it down own tab.By using something like numTabs = '\t' * (2-len(last_name)//6 Or a better approach would use something like str.format as mentioned by #Michael Butscher in the comments.
Need help writing a regex
I need a regex that reads a file with blast information. The file looks like: ****ALIGNMENT**** Sequence: gi|516137619|ref|WP_017568199.1| hypothetical protein [Nocardiopsis synnemataformans] length: 136 E_value: 8.9548e-11 score: 153.0 bit_score: 63.5438 identities: 35 positives: 42 gaps: 6 align_length: 70 query: MIRIHPASRDPQTLLDPENWRSAAWNGAPIRDCRGCIDCCDDDWNRSEPEWRRCYGEHLAEDVRHGVAVC... match: MIRI A+RD LLDP NW S W+ A R CRGC DC + +CYGE + +DVRHGV+VC... sbjct: MIRIDRANRDHAELLDPANWLSFHWSNAT-RACRGCDDC-----GGTTETLVQCYGEGVVDDVRHGVSVC... I already have a code, but in this file there is some extra data. The variable names with the corresponding name in this example, are: hitsid = 516137619 protein = hypothetical protein organism = Nocardiopsis synnemataformans length = 136 evalue = 8.9548e-11 score = 153.0 bitscore = 63.5438 identities = 35 positives = 42 gaps = 6 query = MIRIHPASRDPQTLLDPENWRSAAWNGAPIRDCRGCIDCCDDDWNRSEPEWRRCYGEHLAEDVRHGVAVC... match = MIRI A+RD LLDP NW S W+ A R CRGC DC + +CYGE + +DVRHGV+VC... subject = MIRIDRANRDHAELLDPANWLSFHWSNAT-RACRGCDDC-----GGTTETLVQCYGEGVVDDVRHGVSVC... I'm looking for something like this, this is a regex I already got, but now there are some extra things added: p = re.compile(r'^Sequence:[^|]*\|(?P<hitsid>[^|]*)\|\S*\s*(?P<protein>[^][]*?)\s*\[(?P<organism>[^][]*)][\s\S]*?\nE-value:\s*(?P<evalue>.*)', re.MULTILINE) File looks like: ****ALIGNMENT**** Sequence: gi|516137619|ref|WP_017568199.1| hypothetical protein [Nocardiopsis synnemataformans] length: 136 E_value: 8.9548e-11 score: 153.0 bit_score: 63.5438 identities: 35 positives: 42 gaps: 6 align_length: 70 query: MIRIHPASRDPQTLLDPENWRSAAWNGAPIRDCRGCIDCCDDDWNRSEPEWRRCYGEHLAEDVRHGVAVC... match: MIRI A+RD LLDP NW S W+ A R CRGC DC + +CYGE + +DVRHGV+VC... sbjct: MIRIDRANRDHAELLDPANWLSFHWSNAT-RACRGCDDC-----GGTTETLVQCYGEGVVDDVRHGVSVC... ****ALIGNMENT**** Sequence: gi|962700925|ref|BC_420072443.1| Protein crossbronx-like [Nocardiopsis synnemataformans] length: 136 E_value: 8.9548e-11 score: 153.0 bit_score: 63.5438 identities: 35 positives: 42 gaps: 6 align_length: 70 query: MIRIHPASRDPQTLLDPENWRSAAWNGAPIRDCRGCIDCCDDDWNRSEPEWRRCYGEHLAEDVRHGVAVC... match: MIRI A+RD LLDP NW S W+ A R CRGC DC + +CYGE + +DVRHGV+VC... sbjct: MIRIDRANRDHAELLDPANWLSFHWSNAT-RACRGCDDC-----GGTTETLVQCYGEGVVDDVRHGVSVC... ****ALIGNMENT**** Sequence: gi|516137619|ref|WP_017568199.1| hypothetical protein [Nocardiopsis synnemataformans] length: 136 E_value: 8.9548e-11 score: 153.0 bit_score: 63.5438 identities: 35 positives: 42 gaps: 6 align_length: 70 query: MIRIHPASRDPQTLLDPENWRSAAWNGAPIRDCRGCIDCCDDDWNRSEPEWRRCYGEHLAEDVRHGVAVC... match: MIRI A+RD LLDP NW S W+ A R CRGC DC + +CYGE + +DVRHGV+VC... sbjct: MIRIDRANRDHAELLDPANWLSFHWSNAT-RACRGCDDC-----GGTTETLVQCYGEGVVDDVRHGVSVC...
You no need regexp: parsed = [] raw_parts = open('tmp9.txt','r').read().split('****ALIGNMENT****') for raw_part in raw_parts: parsed_dict = {} for line in raw_part.split('\n'): try: key,value = line.split(':') parsed_dict[key] = value.strip() except: pass parsed.append(parsed_dict) print(parsed)
Appending data to text file dependant on if statment [closed]
Closed. This question does not meet Stack Overflow guidelines. It is not currently accepting answers. Questions asking for code must demonstrate a minimal understanding of the problem being solved. Include attempted solutions, why they didn't work, and the expected results. See also: Stack Overflow question checklist Closed 9 years ago. Improve this question I am trying to get a text file automatically updating new class details. # Example variables completed = Yes class = 13A time = 11:00 if completed: # Check data class and time variables against text file and if they don't exist then add them, if they do exist do nothing. My text files look like: 13A 11:00 Top Students: Joe Smith, Tom Clarke, Jenna Sole Top 3 Attendance: 98.5% Class Score: 54 Yes 13B 11:10 Top Students: Anni Moy, Jessica Longate, Phillip Tome T3 Attendance: 98.5% Class Score: 54 Yes 14A 11:10 Top Students: John Doe, John Smith, Sam Ben T2 Attendance: 98.5% Class Score: 54 Yes Does any one know how this can be done, I would greatly appreciate an example if anyone could be so helpful.
Here's the code that parses the text file and dumps them into variables. Code below illustrates how to parse your text file using regex. import re fp = open('class_data.txt') lines = fp.read(-1) fp.close() records = re.split('\n\s*\n', lines) #Split all the records #print len(records) for record in records: data = record.split('\n') classid, classtime, top_students = data[0], data[1], re.split('^[A-Za-z ]*:', data[2])[1].split(',') attendance, score, completed = re.split('^[A-Za-z ]*:', data[4])[1], re.split('^[A-Za-z ]*:', data[5])[1], data[6] print classid, classtime, top_students, len(top_students), attendance, score, completed Print statement outputs 13A 11:00 [' Joe Smith', ' Tom Clarke', ' Jenna Sole'] 3 98.5% 54 Yes 13B 11:10 [' Anni Moy', ' Jessica Longate', ' Phillip Tome'] 3 98.5% 54 Yes 14A 11:10 [' John Doe', ' John Smith', ' Sam Ben'] 3 98.5% 54 Yes Now that you have your text file converted into variables, We can now add the code to check whether a class is finished and if the record is already contained in the file else add it import re fp = open('class_data.txt') lines = fp.read(-1) fp.close() completed = Yes class = 13A time = 11:00 isClassRecordFound = False records = re.split('\n\s*\n', lines) #Split all the records #print len(records) for record in records: data = record.split('\n') classid, classtime, top_students = data[0], data[1], re.split('^[A-Za-z ]*:', data[2])[1].split(',') attendance, score, completed = re.split('^[A-Za-z ]*:', data[4])[1], re.split('^[A-Za-z ]*:', data[5])[1], data[6] print classid, classtime, top_students, len(top_students), attendance, score, completed if (completed): if (classid == class) and (time == classtime): isClassRecordFound = True break; if not isClassRecordFound: with open("class_data.txt", "a") as myfile: myfile.write(class + '\n' + time)