Hello,
I have created a way to make multiple plots and a duckdb database using python:
import aiohttp
import asyncio
import json
import pandas as pd
import plotly.graph_objects as go
import sys
from datetime import datetime
import dash
from dash import dcc, html
import nest_asyncio
import duckdb
nest_asyncio.apply()
async def fetch_data(session, url, params):
async with session.get(url, params=params) as response:
return await response.json()
async def parse_country_data(country_codes):
country_data = []
url = 'https://api.datacommons.org/stat/series'
async with aiohttp.ClientSession() as session:
tasks = []
for code in country_codes:
params = {'place': f'country/{code}', 'stat_var': 'sdg/EN_ATM_GHGT_AIP'}
tasks.append(fetch_data(session, url, params))
responses = await asyncio.gather(*tasks)
for code, response in zip(country_codes, responses):
if 'series' in response:
country_data.append({
"country_code": code,
"data": [{"year": year, "emission": value} for year, value in response['series'].items()]
})
else:
print(f'No data available for {code}.')
return country_data
async def fetch_eu_data():
url = 'https://api.datacommons.org/stat/series'
params = {'place': 'undata-geo/G00500360', 'stat_var': 'sdg/EN_ATM_GHGT_AIP'}
async with aiohttp.ClientSession() as session:
async with session.get(url, params=params) as response:
eu_data = await response.json()
if 'series' in eu_data:
EU_data = [{"country_code": "EU", "data": [{"year": year, "emission": value} for year, value in eu_data['series'].items()]}]
print(f'Data fetched for European Union:', EU_data)
return EU_data
else:
print("No EU data available.")
return []
async def combine_data(country_data, EU_data):
combined_data = country_data + EU_data if EU_data else country_data
for country in combined_data:
country["data"] = sorted(country["data"], key=lambda x: x["year"])
return combined_data
async def save_data_to_json(data, file_path):
with open(file_path, 'w') as json_file:
json.dump(data, json_file, indent=4)
print(f'Data saved to {file_path}')
async def load_data_from_json(file_path):
with open(file_path, 'r') as json_file:
data = json.load(json_file)
return data
async def main():
# Define country codes and file paths
country_codes = ['AUS', 'AUT', 'BEL', 'BGR', 'BLR', 'CAN', 'CHE', 'CYP', 'CZE', 'DEU',
'DNK', 'ESP', 'EST', 'FIN', 'FRA', 'GBR', 'GRC', 'HRV', 'HUN', 'IRL',
'ISL', 'ITA', 'JPN', 'LIE', 'LTU', 'LUX', 'LVA', 'MCO', 'MLT', 'NLD',
'NOR', 'NZL', 'POL', 'PRT', 'ROU', 'RUS', 'SVK', 'SVN', 'SWE', 'TUR',
'UKR', 'USA']
file_name = "EN_ATM_GHGT_AIP_sorted.json"
file_path = f'/content/{file_name}'
# Fetch and combine data
country_data = await parse_country_data(country_codes)
EU_data = await fetch_eu_data()
combined_data = await combine_data(country_data, EU_data)
# Save and load data
await save_data_to_json(combined_data, file_path)
data_example = await load_data_from_json(file_path)
json.dump({"EN_ATM_GHGT_AIP_Data": combined_data}, sys.stdout)
# Convert data to DataFrame
df = pd.DataFrame([{'country_code': d['country_code'], 'year': item['year'], 'emission': item['emission']} for d in combined_data for item in d['data']])
# Save data to CSV
csv_file_path = '/content/EN_ATM_GHGT_AIP_Data.csv'
df.to_csv(csv_file_path, index=False)
print(f'Data saved to {csv_file_path}')
# Save data to DuckDB
con = duckdb.connect('/content/EN_ATM_GHGT_AIP_Data.db')
con.register('data', df)
con.execute("CREATE TABLE EN_ATM_GHGT_AIP_Data AS SELECT * FROM data")
print('Data saved to DuckDB')
# Initialize the Dash app
app = dash.Dash(__name__)
# Define the layout of the app
app.layout = html.Div([
dcc.Dropdown(
id='country-select',
options=[{'label': country['country_code'], 'value': country['country_code']} for country in combined_data],
value=['USA'], # Default value
multi=True
),
dcc.Dropdown(
id='graph-type-select',
options=[
{'label': 'Line Graph', 'value': 'line'},
{'label': 'Heatmap', 'value': 'heatmap'},
{'label': 'Stacked Area Plot', 'value': 'area'},
{'label': 'Pie Chart', 'value': 'pie'},
{'label': 'Scatter Plot', 'value': 'scatter'},
{'label': 'Bar Chart', 'value': 'bar'}
],
value='line' # Default value
),
dcc.Graph(id='EN_ATM_GHGT_AIP_Data-graph')
])
@app.callback(
dash.dependencies.Output('EN_ATM_GHGT_AIP_Data-graph', 'figure'),
[dash.dependencies.Input('country-select', 'value'),
dash.dependencies.Input('graph-type-select', 'value')]
)
def update_graph(selected_countries, graph_type):
traces = []
layout = {
'title': 'Emissions Data Visualization',
'xaxis': {'title': 'Year'},
'yaxis': {'title': 'Emissions (Metric Tons)'}
}
if graph_type == 'heatmap':
z_data = []
for country in selected_countries:
country_data = next(item for item in combined_data if item["country_code"] == country)
years = [data['year'] for data in country_data['data']]
values = [data['emission'] for data in country_data['data']]
z_data.append(values)
traces = [go.Heatmap(z=z_data, x=years, y=selected_countries)]
layout['title'] = 'Emissions Heatmap'
elif graph_type == 'pie':
total_emissions = []
for country in selected_countries:
country_data = next(item for item in combined_data if item["country_code"] == country)
total_emissions.append(sum(data['emission'] for data in country_data['data']))
traces = [go.Pie(labels=selected_countries, values=total_emissions)]
layout['title'] = 'Total Emissions Distribution'
elif graph_type == 'scatter':
for country in selected_countries:
country_data = next(item for item in combined_data if item["country_code"] == country)
years = [data['year'] for data in country_data['data']]
values = [data['emission'] for data in country_data['data']]
traces.append(go.Scatter(x=years, y=values, mode='markers', name=country))
elif graph_type == 'bar':
for country in selected_countries:
country_data = next(item for item in combined_data if item["country_code"] == country)
years = [data['year'] for data in country_data['data']]
values = [data['emission'] for data in country_data['data']]
traces.append(go.Bar(x=years, y=values, name=country))
else: # Default to line or stacked area plot
mode = 'lines+markers' if graph_type == 'line' else 'lines'
stackgroup = 'one' if graph_type == 'area' else None
for country in selected_countries:
country_data = next(item for item in combined_data if item["country_code"] == country)
years = [data['year'] for data in country_data['data']]
values = [data['emission'] for data in country_data['data']]
traces.append(go.Scatter(x=years, y=values, mode=mode, stackgroup=stackgroup, name=country_data["country_code"]))
layout['title'] = 'Emissions Trend'
return {
'data': traces,
'layout': layout
}
# Run the app
app.run_server(debug=True)
# Run the main function
asyncio.run(main())
However I’m struggling to refactor this logic into an index.md with just javascript.
I do not care about performing the API invocations, but just reading from the duckdb .db file, .csv or .json produced here to be graphed interactively with the user being able to choose which plot to use. So pick as many countries as I want and choose between plot styles like heat, bar, stacked, line all in one interface: (like so in my python graph of the code above):
The .json file is structured like this:
[
{
"country_code": "AUS",
"data": [
{
"year": "1990",
"emission": 423.67222
},
{
"year": "1991",
"emission": 423.73306
},
{
"year": "1992",
"emission": 427.42608
},
{
"year": "1993",
"emission": 428.33707
},
{
"year": "1994",
"emission": 428.81822
},
{
"year": "1995",
"emission": 437.27655
},
{
"year": "1996",
"emission": 443.92827
},
{
"year": "1997",
"emission": 456.01598
},
{
"year": "1998",
"emission": 469.75152
},
{
"year": "1999",
"emission": 476.39199
},
{
"year": "2000",
"emission": 487.77776
},
{
"year": "2001",
"emission": 495.60431
},
{
"year": "2002",
"emission": 499.19049
},
{
"year": "2003",
"emission": 500.59996
},
{
"year": "2004",
"emission": 517.72651
},
{
"year": "2005",
"emission": 524.21149
},
{
"year": "2006",
"emission": 528.03842
},
{
"year": "2007",
"emission": 534.41784
},
{
"year": "2008",
"emission": 537.26231
},
{
"year": "2009",
"emission": 540.38162
},
{
"year": "2010",
"emission": 535.54922
},
{
"year": "2011",
"emission": 537.2055
},
{
"year": "2012",
"emission": 539.51105
},
{
"year": "2013",
"emission": 530.31191
},
{
"year": "2014",
"emission": 524.7094
},
{
"year": "2015",
"emission": 533.06383
},
{
"year": "2016",
"emission": 542.28787
},
{
"year": "2017",
"emission": 549.33002
},
{
"year": "2018",
"emission": 549.73194
},
{
"year": "2019",
"emission": 545.15252
}
]
},
{
"country_code": "AUT",
"data": [
{
"year": "1990",
"emission": 78.42049
},
{
"year": "1991",
"emission": 82.08233
},
{
"year": "1992",
"emission": 75.46927
},
{
"year": "1993",
"emission": 75.70919
},
{
"year": "1994",
"emission": 75.9583
},
{
"year": "1995",
"emission": 79.23798
},
{
"year": "1996",
"emission": 82.45259
},
{
"year": "1997",
"emission": 82.11623
},
{
"year": "1998",
"emission": 81.43285
},
{
"year": "1999",
"emission": 79.89837
},
{
"year": "2000",
"emission": 80.12943
...
and i put it in an appropriately named schema:
- [EN_ATM_GHGT_AIP_series.json]
I tried many ways but to no avail:
Also I don’t understand why I can’t use ‘const’ or ‘let’ invocations and have to split each of them in individual cells for imports or functions?
This, is what I started with: https://www.youtube.com/watch?v=Urf_bPFyhIk
Would appreciate some guidance on how to:
- Add as many countries as I want in and out of any graph.
- Correctly build it with the fewest files possible with all or some of these files:
a. /data/*.csv or *.json *.db
b. index.html
c. index.md (and how to style it cell for cell locally on my linux machine)
Thanks!