Useful API endpoints and how to access them with Python. An application program interface (API) is code that allows two software programs to communicate with each other.
Punk API | DummyJSON | Spotify Web API | YouTube |
A Guide to Obtaining Time Series Datasets in Python
The COVID 19 API has a free tier and provides responses in JSON. Start with the documentation on Postman.
The Zillow API has close to close to 20 APIs available with data and functionality related to the various aspects of Real Estate, including mortgate, Multiple Listing Service (MLS), and public data.
The Spotify Web API provides access to dataset of about 169,000 records. The base URI for all Web API requests is https://api.spotify.com/v1 In order to make successful Web API requests your app will need a valid OAutho 2.0 access token.
Punk API is a simple endpoint that requires no authentication and delivers detailed information about 325 beers returned as JSON. The base endpoint is https://api.punkapi.com/v2/ You can page one beer at a time using the URL https://api.punkapi.com/v2/beers/?per_page=1&page=1 where each 'page=#' gives you details about a different beer. Try it with your browser.
A really good tutorial on how to access this endpoint using Python and PostgresSQL is written by Haki Benita and published on medium in the article Fastest Way to Load Data Into PostgreSQL Using Python. Below is my more direct approach derived from his work.
#
# Written by: Mark W Kiehl
# http://mechatronicsolutionsllc.com/
# http://www.savvysolutions.info/savvycodesolutions/
#
# Demonstrate using psycopg to create a table and then populate it with
# data using the psycopg function psycopg2.extras.execute_batch.
#
# Portions inspired and derived from the work of Haki Benita https://hakibenita.medium.com/fastest-way-to-load-data-into-postgresql-using-python-d2e6de8b2aaa
#
# https://www.postgresqltutorial.com/postgresql-python/
# https://www.psycopg.org/docs/index.html
# pip install psycopg2
import psycopg2
import psycopg2.extras # for execute_batch
conn = psycopg2.connect(
host="localhost",
database="pytestdb",
user="postgres",
password="[your db password]",
)
conn.autocommit = True #set autocommit=True so every command we execute will take effect immediately
def create_db_table(cur):
""" Connect to the local PostgreSQL database server """
try:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
# create a cursor
cur = conn.cursor()
# execute a SQL statement to get the PostgreSQL version
cur.execute('SELECT version()')
# display the PostgreSQL database server version
db_version = cur.fetchone()
print(db_version)
# Create a new table 'test_a'
cur.execute("""
DROP TABLE IF EXISTS test_a;
CREATE UNLOGGED TABLE test_a (
id INTEGER,
description TEXT,
first_brewed DATE,
num_float DECIMAL,
num_int INTEGER
);
""")
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
print('Created db table staging_bears and closed the db connection')
def add_data_to_table(cur, list_data):
# Psycopg2 provides a way to insert many rows at once using .execute_batch (FYI much faster than .executebatch).
#print('type(list_data) {}'.format(type(list_data)))
print('Inserting {} records into the db table test_a'.format(len(list_data)))
try:
# connect to the PostgreSQL server
print('Connecting to the PostgreSQL database...')
psycopg2.extras.execute_batch(cur, """
INSERT INTO test_a VALUES (
%(id)s,
%(description)s,
%(first_brewed)s,
%(num_float)s,
%(num_int)s
);
""", list_data)
# execute a SQL statement to get the number of records in table test_a
cur.execute('select count(*) from test_a;')
rows = cur.fetchone()
print('{} rows added to table test_a'.format(rows))
except (Exception, psycopg2.DatabaseError) as error:
print('SQL ERROR: {}'.format(error))
finally:
print('add_data_to_table() done')
from datetime import datetime
if __name__ == '__main__':
# Create table test_a
with conn.cursor() as cur:
create_db_table(cur)
# create some data as a list of dictionaries.
# NOTE: The key of the dictionary must match the table field (column) names.
dic = { 'id': 1,
'description': 'my first description',
'first_brewed': datetime.now(),
'num_float': 1.2345,
'num_int': 32767
}
li = []
li.append(dic)
dic = { 'id': 2,
'description': 'my second description',
'first_brewed': datetime.now(),
'num_float': 5432.1,
'num_int': -32767
}
li.append(dic)
print(li)
add_data_to_table(cur, li)
if conn is not None:
conn.close()
DummyJSON is an endpoint the provides multiple types of REST Endpoints filled with JSON data for testing. All HTTP methods are supported.
#
# Written by: Mark W Kiehl
# http://mechatronicsolutionsllc.com/
# http://www.savvysolutions.info/savvycodesolutions/
# How to access the API endpoint DummyJSON
# https://dummyjson.com/
#
# Related links:
# https://towardsdatascience.com/how-to-build-an-elt-with-python-8f5d9d75a12e
# venv: api
# Endpoint users https://dummyjson.com/users
"""
{
"users": [
{
"id": 1,
"firstName": "Terry",
"lastName": "Medhurst",
"maidenName": "Smitham",
"age": 50,
"gender": "male",
"email": "atuny0@sohu.com",
"phone": "+63 791 675 8914",
"username": "atuny0",
"password": "9uQFF1Lh",
"birthDate": "2000-12-25",
"image": "https://robohash.org/hicveldicta.png?size=50x50&set=set1",
"bloodGroup": "A−",
"height": 189,
"weight": 75.4,
"eyeColor": "Green",
"hair": {
"color": "Black",
"type": "Strands"
},
"domain": "slashdot.org",
"ip": "117.29.86.254",
"address": {
"address": "1745 T Street Southeast",
"city": "Washington",
"coordinates": {
"lat": 38.867033,
"lng": -76.979235
},
"postalCode": "20020",
"state": "DC"
},
"macAddress": "13:69:BA:56:A3:74",
"university": "Capitol University",
"bank": {
"cardExpire": "06/22",
"cardNumber": "50380955204220685",
"cardType": "maestro",
"currency": "Peso",
"iban": "NO17 0695 2754 967"
},
"company": {
"address": {
"address": "629 Debbie Drive",
"city": "Nashville",
"coordinates": {
"lat": 36.208114,
"lng": -86.58621199999999
},
"postalCode": "37076",
"state": "TN"
},
"department": "Marketing",
"name": "Blanda-O'Keefe",
"title": "Help Desk Operator"
},
"ein": "20-9487066",
"ssn": "661-64-2976",
"userAgent": "Mozilla/5.0 ..."
},
{...},
{...}
// 30 items
],
"total": 100,
"skip": 0,
"limit": 30
}
"""
#Python performance timer
from pickle import NONE
import time
t_start_sec = time.perf_counter()
# pip install memory-profiler
# Use the package memory-profiler to measure memory consumption
# The peak memory is the difference between the starting value of the “Mem usage” column, and the highest value (also known as the “high watermark”).
# IMPORTANT: See how / where @profile is inserted before a function later in the script.
from memory_profiler import profile
import requests
ENDPOINT = "https://dummyjson.com/"
def make_api_call(resource):
results_picked = 0
total_results = 100 #We don't know yet, but we need to initialize
all_data = []
while results_picked < total_results:
response = requests.get(f"{ENDPOINT}{resource}", params = {"skip" : results_picked})
if response.status_code == 200:
data = response.json()
rows = data.get(resource)
all_data += rows #concatening the two lists
total_results = data.get("total")
results_picked += len(rows) #to skip them in the next call
else:
raise Exception(response.text)
return all_data
import json
@profile # instantiating the decorator for the function/code to be monitored for memory usage by memory-profiler
def main():
data_json = make_api_call("users")
print('Detailed data for {} items acquired from {}'.format(len(data_json),ENDPOINT))
#print(json.dumps(data_json, sort_keys=False, indent=2)) #prints out the data formatted nicely using json
# Report the script execution time
t_stop_sec = time.perf_counter()
print('\nElapsed time {:6f} sec'.format(t_stop_sec-t_start_sec))
if __name__ == '__main__':
main();
Python Solutions
Sitemap | Copyright © 2017 - 2024 Mechatronic Solutions LLC
Web site by www.MechatronicSolutionsLLC.com | | 13.3350 ms