-
Notifications
You must be signed in to change notification settings - Fork 348
/
Copy pathread_csv.py
147 lines (113 loc) · 3.26 KB
/
read_csv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# /// script
# requires-python = ">=3.10"
# dependencies = [
# "duckdb==1.1.1",
# "marimo",
# "polars==1.18.0",
# "pyarrow==18.1.0",
# "vega-datasets==0.9.0",
# ]
# ///
import marimo
__generated_with = "0.10.7"
app = marimo.App(width="medium")
@app.cell(hide_code=True)
def _(mo):
mo.md(
"""
# Read CSV
This notebook shows how to read a CSV file from a local file or a URL into an in-memory table.
"""
)
return
@app.cell(hide_code=True)
def _():
import marimo as mo
import polars as pl
pl.DataFrame({"A": [1, 2, 3], "B": ["a", "b", "c"]}).write_csv("data.csv")
return mo, pl
@app.cell(hide_code=True)
def _(mo):
mo.md("""Reading from a local CSV is as easy as `SELECT * from "data.csv"`, where `data.csv` is the path to your local file (or a URL to a CSV file).""")
return
@app.cell(hide_code=True)
def _(mo):
mo.accordion(
{
"Tip: Creating SQL Cells": mo.md(
f"""
Create a SQL cell in one of two ways:
1. Click the {mo.icon("lucide:database")} `SQL` button at the **bottom of your notebook**
2. **Right-click** the {mo.icon("lucide:circle-plus")} button to the **left of a cell**, and choose `SQL`.
In the SQL cell, you can query dataframes in your notebook as if
they were tables — just reference them by name.
"""
)
}
)
return
@app.cell
def _(mo):
result = mo.sql(
f"""
-- Tip: you can also specify the data files using a glob, such as '/path/to/*.csv'
-- or '/path/**/to/*.csv'
SELECT * FROM "data.csv"
""",
output=False,
)
return (result,)
@app.cell(hide_code=True)
def _(mo):
mo.accordion(
{
"Tip: Query output": mo.md(
r"""
The query output is returned to Python as a dataframe (Polars if you have it installed, Pandas otherwise).
Choose the dataframe name via the **output variable** input in the bottom-left
of the cell. If the name starts with an underscore, it won't be made available
to other cells. In this case, we've named the output `result`.
"""
)
}
)
return
@app.cell
def _(result):
result
return
@app.cell(hide_code=True)
def _(mo):
mo.md(
r"""
## Create an in-memory table from a CSV file
You can also create a table from a CSV file, so you can easily query it in subsequent cells. This table will appear in marimo's data sources panel.
"""
)
return
@app.cell
def _(mo):
_df = mo.sql(
f"""
CREATE TABLE myTable AS SELECT * FROM "data.csv"
"""
)
return (myTable,)
@app.cell
def _(mo, myTable):
_df = mo.sql(
f"""
SELECT * FROM myTable
"""
)
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""## Advanced usage""")
return
@app.cell(hide_code=True)
def _(mo):
mo.md(r"""To customize how your CSV is read, including specifying the delimiter type, use [duckdb's `read_csv` function](https://duckdb.org/docs/data/csv/overview.html).""")
return
if __name__ == "__main__":
app.run()