How to implement this Jupyter notebook on Google Colab? - python

I just started using Google Colab a few horus ago and I'm trying to figure our how to read,write and save stuff etc.
I have this code on Jupyter notebook,and I'm having trouble at the last part where I save the file, I want to save it either on my local computer or Google Drive?
import pandas as pd
pd.set_option('display.max_columns', 999)
#load data
df = pd.read_csv('D:\\Project\\database\\Isolation Forest\\IF 15 PERCENT.csv')
df.shape
#data info
info = df.info()
print(info)
#data description
describe = df.describe() #print(describe)
f = open('D:\\Project\\database\\Isolation Forest\\Final Description IF TEST11.txt', "w+")
print(describe, file=f)
f.close()
and
Google Colab Code:
import pandas as pd
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
# Authenticate and create the PyDrive client.
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
link = '......'
fluff, id = link.split('=')
print (id) # Verify that you have everything after '='
downloaded = drive.CreateFile({'id':id})
downloaded.GetContentFile('IF 15 PERCENT.csv')
df = pd.read_csv('IF 15 PERCENT.csv',index_col=None)
info = df.info()
print(info)
describe = df.describe()
I don't really know how to save it now as txt file and w+
Thank you.

This will save your dataframe in text format:
tfile = open('test.txt', 'w+')
tfile.write(describe.to_string())
tfile.close()

Related

Google colab can't read csv file though I entered correct path

I tried to get genres of songs in regional-us-daily-latest, and output genres and other datas as csv file. But colab said,
FileNotFoundError: [Errno 2] No such file or directory: 'regional-us-daily-latest.csv'
I mounted My Drive, but still didn't work.
Could you shed some light on this?
!pip3 install spotipy
import pandas as pd
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import json
from google.colab import drive
client_id = ‘ID’
client_secret = ’SECRET’
client_credentials_manager = spotipy.oauth2.SpotifyClientCredentials(client_id, client_secret)
spotify = spotipy.Spotify(client_credentials_manager=client_credentials_manager)
import csv
csvfile = open('/content/drive/MyDrive/regional-us-daily-latest.csv', encoding='utf-8')
csvreader = csv.DictReader(csvfile)
us = ("regional-us-daily-latest.csv", "us.csv")
for region in (us):
inputfile = region[0]
outputfile = region[1]
songs = pd.read_csv(inputfile, index_col=0, header=1)
songs = songs.assign(Genre=0)
for index, row in songs.iterrows():
artist = row["Artist"]
result = spotify.search(artist, limit=1, type="artist")
genre = result["artists"]["items"][0]["genres"]
songs['Genre'][index] = genre
songs.head(10)
songs.to_csv(outputfile)
files.download(outputfile)
Save the csv file in the Google drive and go to your notebook click on drive and search for your file in the drive . Then copy the path of the csv file in a variable and use the variable using read_csv() method
please mount the drive first
from google.colab import drive
drive.mount('/content/drive')
Change the directory to MyDrive and check current directory
import os
os.chdir("drive/My Drive/")
print(os.getcwd())
!ls
Set the path of file. and use source_file variable where file name required
source_file = os.path.join(os.getcwd(), "regional-us-daily-latest.csv")

How to save an np.array on google drive using colab?

I write a program in Colab and the result of the program is np.arrays. Please tell me how to save the array to a file, and then how to read it from the file?
I read this instruction: https://colab.research.google.com/notebooks/io.ipynb#scrollTo=S7c8WYyQdh5i
As a result, I figured out how to connect to a google drive and how to create and upload a text file there in the directory I need.
from google.colab import drive
drive.mount('/content/drive')
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)
uploaded = drive.CreateFile({'title': 'Sample upload.txt'})
uploaded.SetContentString('Sample upload file content')
uploaded.Upload()
print('Uploaded file with ID {}'.format(uploaded.get('id')))
I also know that you can save the array as a text file like this:
import numpy as np
a = np.array([1, 2, 3, 4, 5])
np.savetxt ("array.txt", a, fmt = "% s")
But I can't figure out how to save this text file to google drive. And how to read an array from it?
This will put the file in the top level of your Drive (https://drive.google.com/drive/my-drive):
import numpy as np
from google.colab import drive
drive.mount('/content/drive')
a = np.array([1, 2, 3, 4, 5])
with open('/content/drive/My Drive/array.txt', 'w') as f:
np.savetxt(f, a)
You can then use this to read the array back into numpy:
with open('/content/drive/My Drive/array.txt', 'r') as f:
a = np.loadtxt(f)

Read excel file from google drive without downloading file

I wants to read excel sheets from excel file on google drive without downloading on local machine! i searched for google drive api but couldn't find solution i tried following code please need suggestion:
'''
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
file_id = 'abc'
file_name = 'abc.xlsx'
downloaded = drive.CreateFile({'id': file_id})
downloaded.GetContentFile(file_name)
class TestCase:
def __init__(self, file_name, sheet):
self.file_name = file_name
self.sheet = sheet
testcase = pd.read_excel(file_name, usecols=None, sheet_name=sheet)
print(testcase)
class TestCaseSteps:
def __init__(self, file_name, sheet):
self.file_name = file_name
self.sheet = sheet
testcase = pd.read_excel(file_name, usecols=None, sheet_name=sheet)
print(testcase)
testcases = TestCase(file_name, 'A')
steps = TestCaseSteps(file_name, 'B')
'''
I believe your goal and situation as follows.
You want to read the XLSX downloaded from Google Drive using pd.read_excel.
You want to achieve this without saving the downloaded XLSX data as a file.
Your gauth = GoogleAuth() can be used for downloading the Google Spreadsheet as the XLSX format.
In this case, I would like to propose the following flow.
Download the Google Spreadsheet as XLSX format.
In this case, it directly requests to the endpoint for exporting Spreadsheet as XLSX format using requests library.
The access token is retrieved from gauth = GoogleAuth().
The downloaded XLSX data is read with pd.read_excel.
In this case, BytesIO is used for reading the data.
By this flow, when the Spreadsheet is downloaded as the XLSX data, the XLSX data can be read without saving it as a file. When above flow is reflected to the script, it becomes as follows.
Sample script:
Before you run the script, please set the Spreadsheet ID.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
import requests
from io import BytesIO
spreadsheetId = "###" # <--- Please set the Spreadsheet ID.
# 1. Download the Google Spreadsheet as XLSX format.
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
url = "https://www.googleapis.com/drive/v3/files/" + spreadsheetId + "/export?mimeType=application%2Fvnd.openxmlformats-officedocument.spreadsheetml.sheet"
res = requests.get(url, headers={"Authorization": "Bearer " + gauth.attr['credentials'].access_token})
# 2. The downloaded XLSX data is read with `pd.read_excel`.
sheet = "Sheet1"
values = pd.read_excel(BytesIO(res.content), usecols=None, sheet_name=sheet)
print(values)
References:
Download a Google Workspace Document
pandas.read_excel
Added:
At the following sample script, it supposes that the XLSX file is put to the Google Drive, and the XLSX file is downloaded.
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
import pandas as pd
import requests
from io import BytesIO
file_id = "###" # <--- Please set the file ID of XLSX file.
# 1. Download the XLSX data.
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
url = "https://www.googleapis.com/drive/v3/files/" + file_id + "?alt=media"
res = requests.get(url, headers={"Authorization": "Bearer " + gauth.attr['credentials'].access_token})
# 2. The downloaded XLSX data is read with `pd.read_excel`.
sheet = "Sheet1"
values = pd.read_excel(BytesIO(res.content), usecols=None, sheet_name=sheet)
print(values)

Automatically authenticate gspread in Collab

I currently have a script which reads a Google Sheet URL, and converts to a dataframe. To access this file, I need to authenticate each time I run the script. Is there an alternative which means I only have to do this once?
from google.colab import auth
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())
import pandas as pd
import matplotlib.pyplot as plt
wb = gc.open_by_url('...')
sheet = wb.worksheet('Sheet1')
data = sheet.get_all_values()
df = pd.DataFrame(data)
Thanks!

Google Spreadsheet to CSV in Google Drive

While uploading CSV file to Google drive, it automatically converting to Google Sheets. How to save it as CSV file in drive? or can I read google sheet through pandas data frame ?
Develop environment: Google Colab
Code Snippet:
Input
data = pd.read_csv("ner_dataset.desktop (3dec943a)",
encoding="latin1").fillna(method="ffill")
data.tail(10)
Output
[Desktop Entry]
0 Type=Link
1 Name=ner_dataset
2 URL=https://docs.google.com/spreadsheets/d/1w0...
WORKING CODE
from google.colab import auth
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())
worksheet = gc.open('Your spreadsheet name').sheet1
# get_all_values gives a list of rows.
rows = worksheet.get_all_values()
print(rows)
# Convert to a DataFrame and render.
import pandas as pd
pd.DataFrame.from_records(rows)
#Mount the Drive
from google.colab import drive
drive.mount('drive')
#Authenticate you need to do with your credentials, fill yourself
gauth = GoogleAuth()
#Create CSV and Copy
df.to_csv('data.csv')
!cp data.csv drive/'your drive'

Categories