Sorting and sequencing data from a file - python

I've developed a program that stores a list of ids, so:
But for the desired purposes, the data should take the sequential form, so that the first pair of ids is something like: "889926212541448192" becomes 1 and "889919950248448000" becomes 2. That is, the file to be should be something like:
Where the first id connects with 2,3 and 6, and the id 4 only with 5, forming a network.
I have no experience in this area, but I can not find a way to do this reading.
I tried to do some programs, but they only read row and not column id to id. This data is saved following the following program
import json
arq = open('ids.csv','w')
arq.write('Source'+','+'Target')
arq.write("\n")
lista_rede = [] #list to store all ids
with open('dados_twitter.json', 'r') as f:
for line in f:
lista = []
tweet = json.loads(line) # to write as a Python dictionary
lista = list(tweet.keys()) #write list of keys
try:
if 'retweeted_status' in lista:
id_rt = json.dumps(tweet['retweeted_status']['id_str'])
id_status = json.dumps(tweet['id_str'])
lista_rede.append(tweet['id_str'])
lista_rede.append(tweet['retweeted_status']['id_str'])
arq.write( id_status +','+ id_rt )
arq.write("\n")
if tweet['quoted_status'] in lista :
id_rt = json.dumps(tweet['quoted_status']['id_str'])
id_status = json.dumps(tweet['id_str'])
lista_rede.append(tweet['id_str'])
lista_rede.append(tweet['quoted_status']['id_str'])
arq.write( id_status +','+ id_rt )
arq.write("\n")
except:
continue
arq.close()
As a result I have a file with ids data in pairs of interactions.
How can I then rearrange these data in reading, or even how to write them ?? In Python or another language?

The following snippet would do the job-
import re
header = ''
id_dict = {}
# read the ids
with open('ids.csv') as fr:
header = fr.readline()
for line in fr:
ids = [int(s) for s in re.findall(r'\d+', line)]
try:
id_dict[int(ids[0])].append(int(ids[1]))
except:
id_dict[int(ids[0])] = [int(ids[1])]
# sort the ids
for key in id_dict:
id_dict[key].sort()
# save the sorted ids in a new file
with open('ids_sorted.txt', 'w') as fw:
# fw.write(header)
for key in sorted(id_dict):
for value in id_dict[key]:
fw.write("{0} {1}\n".format(key, value))

Related

How to make a list thats callable by an instruction displayed in another CSV

I have two CSVs, one where the profiles are stored with the profile name naming each line and I want it to be a key in a dict like so: profiles = {"profile1": ["item"]} so I could pull the list of each profile / row.
Example of the CSVs
profiles.csv
PROFILE_NAME,FIRST_NAME,LAST_NAME
Profile1,John,Doe
Profile2,Jane,Deere
instructions.csv
PROFILE_NAME,JOB,HOURS
Profile1,Mechanic,50
Profile2,Cook,65
Code I have tried for storing rows as lists:
def info():
file = "profiles.csv"
with open(file) as f:
reader = csv.DictReader(f)
profiles = {}
for row in reader:
name = row['PROFILE_NAME']
profiles[name] = [
row['FIRSTNAME'],
row['LASTNAME']
]
return profiles

Getting multiple twitter screen names from list of IDs using python

How do i get the screen names from a list of twitter IDs? I have the IDs saved in a pandas dataframe and have 38194 IDs that i wish to match to their screen names so i can do a network analysis. I am using python, but i am quite new to coding so i do not know if this is even possible? I have tried the following:
myIds = friend_list
if myIds:
myIds = myIds.replace(' ','')
myIds = myIds.split(',')
# Set a new list object
myHandleList = []
i = 0
# Loop trough the list of usernames
for idnumber in myIds:
u = api.get_user(myIds[i])
uid = u.screen_name
myHandleList.append(uid)
i = i+1
# Print the lists
print('Twitter-Ids',myIds)
print('Usernames',myHandleList)
#set a filename based on current time
csvfilename = "csvoutput-"+time.strftime("%Y%m%d%-H%M%S")+".csv"
print('We also outputted a CSV-file named '+csvfilename+' to your file parent directory')
with open(csvfilename, 'w') as myfile:
wr = csv.writer(myfile, delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
wr.writerow(['username','twitter-id'])
j = 0
for handle in myHandleList:
writeline = myHandleList[j],myIds[j]
wr.writerow(writeline)
j = j+1
else:
print('The input was empty')
Updating your loop, as I believe you are pretty close.
myHandleList = []
myIds = ['1031291359', '960442381']
for idnumber in myIds:
u = api.get_user(idnumber)
myHandleList.append(u.screen_name)
print(myHandleList)

Saving a Dictionary to File While keeping it as a Dictionary

itemsInExistence = []
item = {}
item['name'] = input("What do you want the new item to be called? ")
item['stats'] = int(input("What is its stat? "))
item['rank'] = int(input("What is its base rank? "))
item['amount'] = int(input("How many of it are there? "))
for i in range(item['amount']):
itemsInExistence.append(item)
def save_list2():
with open('itemsleft.txt', 'wb') as f:
i = 0
for item in itemsInExistence:
pickle.dump(itemsInExistence, f)
i += 1
I tried to save it both normally and with pickle, but neither keeps the dictionary's values. I need to save the dictionary to the file and retrieve it from the file with 'stats', 'rank', 'amount' still being integers and still separate from the rest of the line. (Keep in mind that there will be more than one saved item in itemsInExistence, both to be saved and loaded.)
def save_list2():
ii = 0
for i in itemsInExistence:
d = itemsInExistence[ii]
json.dump(d, open(files2, 'w'))
ii += 1
def load_list2():
with open(files2,'r') as a:
for line in a:
line = line.strip()
itemsInExistence.append(line)
You may use JSON format to store a dict into a file, it's quite easy
import json
file = "foofile"
d = dict()
# fill d
# save data : format the dict to a string and it into the file
json.dump(d, open(file, 'w'))
# read data : read the file's content and parse to a dict
a = json.load(open(file))

how to make the offset drop line while the lines aren't the same length?

def load_from_file():
d = {} # create empty dict
file = open("players.txt", "r")# open file for reading
line = file.readline()
file.close()# we’re done with the file
list = line.split(",")
prop = {"position":"","number":"","name":"","birth":"","id":""}
keys = (sorted(prop))
num = 0
for key in keys:
d[key] = list[num]
num += 1
return d
The problem is that whenever the loop returns to this function it reads the same line! and i want the offset to drop a new line
The problem is that you're telling the program to only read the first line of the file every time you call the function at the file.readline() statement. You should read all the file in at once into a list, then loop through the lines that have been read into the list.
Example:
def load_from_file():
with open("players.txt", "r") as myfile # open file for reading
myfile = file.readlines()
return myfile
def create_dictionary(line):
d = {}
list = line.split(",")
prop = {"position":"","number":"","name":"","birth":"","id":""}
keys = (sorted(prop))
num = 0
for key in keys:
d[key] = list[num]
num += 1
return d
data = []
filedata = load_from_file()
for line in filedata:
data.append(create_dictionary(line))
P.S. Not sure what all you're trying to do with data, but this should help you get the gist.
Using DictReader from the csv module:
def load_from_file():
with open("players.txt") as players:
fields = ["birth", "id", "name", "number", "position"]
reader = csv.DictReader(players, fields)
return list(reader)

reading files python

I want to read files in an advanced mode.
First:
In this file, I have certain steps with which the code has to follow, how do I read the steps until the string [data] appears.
[Steps]
step1 = WebAddress
step2 = Tab
step3 = SecurityType
step4 = Criteria
step5 = Date
step6 = Click1
step7 = Results
step8 = Download
[data]
......
Second:
How can I read all everything after [data].
[data]
WebAddress___________________________ Destination___________ Tab_____________ SecurityType___________________________________________________ Criteria___ Date_______ Click1_ Results_ Download
https://mbsdisclosure.fanniemae.com/ q:\\%s\\raw\\fnmapool Advanced Search Interim MBS: Single-Family Issue Date 09/01/2012 Search 100 CSV XML
https://mbsdisclosure.fanniemae.com/ q:\\%s\\raw\\fnmapool Advanced Search Preliminary Mega: Fannie Mae/Ginnie Mae backed Adjustable Rate Issue Date 09/01/2012 Search 100 CSV XML
https://mbsdisclosure.fanniemae.com/ q:\\%s\\raw\\fnmapool Advanced Search Preliminary Mega: Fannie Mae/Ginnie Mae backed Fixed Rate Issue Date 09/01/2012 Search 100 CSV XML
I want to pass everything under the step____________________ where step can be the steps(e.g. WebAddress).
So for example, if step1 = WebAddress how do I read everything under WebAddress__________________________ and so on? Thanks!
First:
with open(file_name) as f:
print (f.read()).split("[data]")
Second:
with open(file_name) as f:
pre_data,post_data =[s.strip() for s in (f.read()).split("[data]")]
post_data_lines = post_data.splitlines()
headers = post_data_lines[0].split()
print headers
for line in post_data_lines[1:]:
print line.split()
print dict(zip(headers,line.split()))
Im also not sure how your [data]is delimited you may want line.split('\t') if its tabbed
this is untested... but it should work and it doesnt quite get you all the way where you want but at least it gets most of what your want (the "hard" parts)
to split by header width use
file_name = "testdata.txt"
with open(file_name) as f:
pre_data,post_data =[s.strip() for s in (f.read()).split("[data]")]
post_data_lines = post_data.splitlines()
headers = post_data_lines[0].split()
for line in post_data_lines[1:]:
tmpline = []
pos = 0
for itm in headers:
tmpline.append(line[pos:pos+len(itm)])
pos += len(itm)+1
print dict(zip(headers,tmpline))
and if you want the actual header with out the __'s then use
file_name = "testdata.txt"
with open(file_name) as f:
pre_data,post_data =[s.strip() for s in (f.read()).split("[data]")]
post_data_lines = post_data.splitlines()
headers = post_data_lines[0].split()
headers2 = [s.replace("_"," ").strip() for s in headers]
for line in post_data_lines[1:]:
tmpline = []
pos = 0
for itm in headers:
tmpline.append(line[pos:pos+len(itm)])
pos += len(itm)+1
print dict(zip(headers2,tmpline))
First step:
>>> import ConfigParser
>>> cfg = ConfigParser.RawConfigParser()
>>> with open('sample.cfg') as f:
... cfg.readfp(f)
...
>>> cfg.get('Steps','step1')
'WebAddress'
Second step:
>>> data_section = ''
>>> with open('sample.cfg') as f:
... data_section = f.read()
...
>>> data = data_section[data_section.index('[data]')+len('[data]')+1:]
>>> reader = csv.reader(io.BytesIO(data),delimiter='\t')
>>> reader.next() # skips header
>>> results = [row for for row in reader]
Now results is a list of lists, with each inner list having items from the data section.
[['https://mbsdisclosure.fanniemae.com/','q:\\\\%s\\\\raw\\\\fnmapool','Advanced Search', 'Interim MBS: Single-Family', 'Issue Date','09/01/2012','Search','100', 'CSV XML']...]

Categories