-
Notifications
You must be signed in to change notification settings - Fork 348
/
Copy pathquerying_dataframes.py
95 lines (69 loc) · 1.94 KB
/
querying_dataframes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "duckdb==1.1.1",
# "marimo",
# "polars==1.18.0",
# "pyarrow==18.1.0",
# "vega-datasets==0.9.0",
# ]
# ///
import marimo
__generated_with = "0.9.1"
app = marimo.App(width="medium")
@app.cell
def __():
import marimo as mo
return (mo,)
@app.cell(hide_code=True)
def __(mo):
mo.md(
"""
# Querying dataframes
This notebook shows how to use SQL to query Python dataframes.
First, we create a dataframe called `df`.
"""
)
return
@app.cell
def __():
from vega_datasets import data
df = data.iris()
df.head()
return data, df
@app.cell(hide_code=True)
def __(mo):
mo.md(
f"""
Next, we **create a SQL cell**.
Create a SQL cell in one of two ways:
1. Click the {mo.icon("lucide:database")} `SQL` button at the **bottom of your notebook**
2. **Right-click** the {mo.icon("lucide:circle-plus")} button to the **left of a cell**, and choose `SQL`.
In the SQL cell, you can query dataframes in your notebook as if they were tables — just reference them by name.
"""
)
return
@app.cell
def __(df, mo):
result = mo.sql(
f"""
SELECT species, mean(petalLength) as meanPetalLength FROM df GROUP BY species ORDER BY meanPetalLength
""", output=False
)
return (result,)
@app.cell(hide_code=True)
def __(mo):
mo.md(
r"""
The query output is returned to Python as a dataframe (Polars if you have it installed, Pandas otherwise).
Choose the dataframe name via the **output variable** input in the bottom-left of the cell. If the name starts with an underscore, it won't be made available to other cells.
In this case, we've named the output `result`.
"""
)
return
@app.cell
def __(result):
result
return
if __name__ == "__main__":
app.run()