Data Pipelining With Coinbase¶
Python Imports¶
In [1]:
# Standard Library
import datetime
import io
import os
import random
import sys
import warnings
from datetime import datetime, timedelta
from pathlib import Path
# Data Handling
import numpy as np
import pandas as pd
# Data Visualization
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick
import seaborn as sns
from matplotlib.ticker import FormatStrFormatter, FuncFormatter, MultipleLocator
# Data Sources
import yfinance as yf
# Statistical Analysis
import statsmodels.api as sm
# Machine Learning
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
# Suppress warnings
warnings.filterwarnings("ignore")
Add Directories To Path¶
In [2]:
# Add the source subdirectory to the system path to allow import config from settings.py
current_directory = Path(os.getcwd())
website_base_directory = current_directory.parent.parent.parent
src_directory = website_base_directory / "src"
sys.path.append(str(src_directory)) if str(src_directory) not in sys.path else None
# Import settings.py
from settings import config
# Add configured directories from config to path
SOURCE_DIR = config("SOURCE_DIR")
sys.path.append(str(Path(SOURCE_DIR))) if str(Path(SOURCE_DIR)) not in sys.path else None
# Add other configured directories
BASE_DIR = config("BASE_DIR")
CONTENT_DIR = config("CONTENT_DIR")
POSTS_DIR = config("POSTS_DIR")
PAGES_DIR = config("PAGES_DIR")
PUBLIC_DIR = config("PUBLIC_DIR")
SOURCE_DIR = config("SOURCE_DIR")
DATA_DIR = config("DATA_DIR")
DATA_MANUAL_DIR = config("DATA_MANUAL_DIR")
# Print system path
for i, path in enumerate(sys.path):
print(f"{i}: {path}")
0: /usr/lib/python313.zip 1: /usr/lib/python3.13 2: /usr/lib/python3.13/lib-dynload 3: 4: /home/jared/python-virtual-envs/general_313/lib/python3.13/site-packages 5: /home/jared/Cloud_Storage/Dropbox/Websites/jaredszajkowski.github.io/src
Track Index Dependencies¶
In [3]:
# Create file to track markdown dependencies
dep_file = Path("index_dep.txt")
dep_file.write_text("")
Out[3]:
0
Python Functions¶
In [4]:
from coinbase_fetch_available_products import coinbase_fetch_available_products
from coinbase_fetch_full_history import coinbase_fetch_full_history
from coinbase_fetch_historical_candles import coinbase_fetch_historical_candles
from coinbase_pull_data import coinbase_pull_data
from export_track_md_deps import export_track_md_deps
Function Usage¶
Coinbase Fetch Available Products¶
In [5]:
df = coinbase_fetch_available_products(
base_currency=None,
quote_currency="USD",
status="online",
)
In [6]:
# Copy this <!-- INSERT_coinbase_fetch_available_products_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="coinbase_fetch_available_products.md", content=df.to_markdown(floatfmt=".5f"))
✅ Exported and tracked: coinbase_fetch_available_products.md
Coinbase Fetch Historical Candles¶
In [7]:
df = coinbase_fetch_historical_candles(
product_id="BTC-USD",
start=datetime(2025, 1, 1),
end=datetime(2025, 1, 1),
granularity=86_400,
)
In [8]:
# Copy this <!-- INSERT_coinbase_fetch_historical_candles_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="coinbase_fetch_historical_candles.md", content=df.to_markdown(floatfmt=".5f"))
✅ Exported and tracked: coinbase_fetch_historical_candles.md
Coinbase Fetch Full History¶
In [9]:
df = coinbase_fetch_full_history(
product_id="BTC-USD",
start=datetime(2025, 1, 1),
end=datetime(2025, 1, 31),
granularity=86_400,
)
In [10]:
# Copy this <!-- INSERT_coinbase_fetch_full_history_HERE --> to index_temp.md
export_track_md_deps(dep_file=dep_file, md_filename="coinbase_fetch_full_history.md", content=df.to_markdown(floatfmt=".5f"))
✅ Exported and tracked: coinbase_fetch_full_history.md
Coinbase Pull Data¶
In [11]:
# df = coinbase_pull_data(
# base_directory=DATA_DIR,
# source="Coinbase",
# asset_class="Cryptocurrencies",
# excel_export=False,
# pickle_export=True,
# output_confirmation=True,
# base_currency="BTC",
# quote_currency="USD",
# granularity=60, # 60=minute, 3600=hourly, 86400=daily
# status='online', # default status is 'online'
# start_date=datetime(current_year, current_month - 1, 1), # default start date
# end_date=datetime.now() - timedelta(days=1), # updates data through 1 day ago due to lag in data availability
# )