-
Notifications
You must be signed in to change notification settings - Fork 348
/
Copy pathread_json.py
157 lines (121 loc) · 3.3 KB
/
read_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "duckdb==1.1.1",
# "marimo",
# "polars==1.18.0",
# "pyarrow==18.1.0",
# "vega-datasets==0.9.0",
# ]
# ///
import marimo
__generated_with = "0.10.7"
app = marimo.App(width="medium")
@app.cell(hide_code=True)
def _(mo):
mo.md(
"""
# Read JSON
This notebook shows how to read a JSON file from a local file or a URL into an in-memory table.
"""
)
return
@app.cell(hide_code=True)
def _():
import marimo as mo
import polars as pl
pl.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]}).write_json("data.json")
return mo, pl
@app.cell(hide_code=True)
def _(mo):
mo.md(
"""
Reading from a JSON file is as easy as
```sql
SELECT * from 'data.json'
```
where `data.json` is the path or URL to your json file.
"""
)
return
@app.cell(hide_code=True)
def _(mo):
mo.accordion(
{
"Tip: Creating SQL Cells": mo.md(
f"""
Create a SQL cell in one of two ways:
1. Click the {mo.icon("lucide:database")} `SQL` button at the **bottom of your notebook**
2. **Right-click** the {mo.icon("lucide:circle-plus")} button to the **left of a cell**, and choose `SQL`.
In the SQL cell, you can query dataframes in your notebook as if
they were tables — just reference them by name.
"""
)
}
)
return
@app.cell
def _(mo):
result = mo.sql(
f"""
-- Tip: you can also specify the data files using a glob, such as '/path/to/*.json'
-- or '/path/**/to/*.json'
SELECT * FROM 'data.json'
""",
output=False,
)
return (result,)
@app.cell(hide_code=True)
def _(mo):
mo.accordion(
{
"Tip: Query output": mo.md(
r"""
The query output is returned to Python as a dataframe (Polars if you have it installed, Pandas otherwise).
Choose the dataframe name via the **output variable** input in the bottom-left
of the cell. If the name starts with an underscore, it won't be made available
to other cells. In this case, we've named the output `result`.
"""
)
}
)
return
@app.cell
def _(result):
result
return
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"""
## Create an in-memory table from a JSON file
You can also create a table from a JSON file, so you can easily query it in subsequent cells. This table will appear in marimo's data sources panel.
"""
)
return
@app.cell
def _(mo):
_df = mo.sql(
f"""
CREATE OR REPLACE TABLE myTable AS SELECT * FROM 'data.json'
"""
)
return (myTable,)
@app.cell
def _(mo, myTable):
_df = mo.sql(
f"""
SELECT * FROM myTable
"""
)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""## Advanced usage""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""To customize how your json file is read, use [duckdb's `read_json` function](https://duckdb.org/docs/data/json/overview.html).""")
return
if __name__ == "__main__":
app.run()