Skip to content

Commit afa7ae2

Browse files
authored
polars to_dicts() instead of to_json for nan handling (#7297)
## 📝 Summary <!-- Provide a concise summary of what this pull request is addressing. If this PR fixes any issues, list them here by number (e.g., Fixes #123). --> handles nans and infs. <img width="656" height="478" alt="image" src="https://github.com/user-attachments/assets/12651df8-7b46-4342-8465-1d4d72138d6c" /> <img width="982" height="622" alt="image" src="https://github.com/user-attachments/assets/34d208dd-bdf5-4f81-b5ba-21146cb74351" /> <img width="772" height="544" alt="image" src="https://github.com/user-attachments/assets/a023c6df-0548-4de1-ad3e-aaf4278c9b0e" /> ## 🔍 Description of Changes <!-- Detail the specific changes made in this pull request. Explain the problem addressed and how it was resolved. If applicable, provide before and after comparisons, screenshots, or any relevant details to help reviewers understand the changes easily. --> ## 📋 Checklist - [x] I have read the [contributor guidelines](https://github.com/marimo-team/marimo/blob/main/CONTRIBUTING.md). - [ ] For large changes, or changes that affect the public API: this change was discussed or approved through an issue, on [Discord](https://marimo.io/discord?ref=pr), or the community [discussions](https://github.com/marimo-team/marimo/discussions) (Please provide a link if applicable). - [x] I have added tests for the changes made. - [x] I have run the code and verified that it works as expected.
1 parent 55cdfa0 commit afa7ae2

File tree

11 files changed

+505
-175
lines changed

11 files changed

+505
-175
lines changed

‎marimo/_messaging/msgspec_encoder.py‎

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,11 @@
1414
import msgspec
1515
import msgspec.json
1616

17+
from marimo import _loggers
1718
from marimo._dependencies.dependencies import DependencyManager
19+
from marimo._plugins.core.media import io_to_data_url
20+
21+
LOGGER = _loggers.marimo_logger()
1822

1923

2024
def enc_hook(obj: Any) -> Any:
@@ -126,6 +130,39 @@ def enc_hook(obj: Any) -> Any:
126130
):
127131
return str(obj)
128132

133+
# Handle Pillow images
134+
if DependencyManager.pillow.imported():
135+
try:
136+
from PIL import Image
137+
138+
if isinstance(obj, Image.Image):
139+
return io_to_data_url(obj, "image/png")
140+
except Exception:
141+
LOGGER.debug("Unable to convert image to data URL", exc_info=True)
142+
143+
# Handle Matplotlib figures
144+
if DependencyManager.matplotlib.imported():
145+
try:
146+
import matplotlib.figure
147+
from matplotlib.axes import Axes
148+
149+
from marimo._output.formatting import as_html
150+
from marimo._plugins.stateless.flex import vstack
151+
152+
if isinstance(obj, matplotlib.figure.Figure):
153+
html = as_html(vstack([str(obj), obj]))
154+
mimetype, data = html._mime_()
155+
156+
if isinstance(obj, Axes):
157+
html = as_html(vstack([str(obj), obj]))
158+
mimetype, data = html._mime_()
159+
return {"mimetype": mimetype, "data": data}
160+
except Exception:
161+
LOGGER.debug(
162+
"Error converting matplotlib figures to HTML",
163+
exc_info=True,
164+
)
165+
129166
# Handle objects with __slots__
130167
slots = getattr(obj, "__slots__", None)
131168
if slots is not None:

‎marimo/_plugins/ui/_impl/tables/narwhals_table.py‎

Lines changed: 0 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,7 @@
1313

1414
from marimo import _loggers
1515
from marimo._data.models import BinValue, ColumnStats, ExternalDataType
16-
from marimo._dependencies.dependencies import DependencyManager
1716
from marimo._output.data.data import sanitize_json_bigint
18-
from marimo._plugins.core.media import io_to_data_url
1917
from marimo._plugins.ui._impl.tables.format import (
2018
FormatMapping,
2119
format_value,
@@ -702,50 +700,3 @@ def __repr__(self) -> str:
702700
if rows is None:
703701
return f"{df_type}: {columns:,} columns"
704702
return f"{df_type}: {rows:,} rows x {columns:,} columns"
705-
706-
def _sanitize_table_value(self, value: Any) -> Any:
707-
"""
708-
Sanitize a value for display in a table cell.
709-
710-
Most values are unchanged, but some values are for better
711-
display such as Images.
712-
"""
713-
if value is None:
714-
return None
715-
716-
# Handle Pillow images
717-
if DependencyManager.pillow.imported():
718-
try:
719-
from PIL import Image
720-
721-
if isinstance(value, Image.Image):
722-
return io_to_data_url(value, "image/png")
723-
except Exception:
724-
LOGGER.debug(
725-
"Unable to convert image to data URL", exc_info=True
726-
)
727-
728-
# Handle Matplotlib figures
729-
if DependencyManager.matplotlib.imported():
730-
try:
731-
import matplotlib.figure
732-
from matplotlib.axes import Axes
733-
734-
from marimo._output.formatting import as_html
735-
from marimo._plugins.stateless.flex import vstack
736-
737-
if isinstance(value, matplotlib.figure.Figure):
738-
html = as_html(vstack([str(value), value]))
739-
mimetype, data = html._mime_()
740-
741-
if isinstance(value, Axes):
742-
html = as_html(vstack([str(value), value]))
743-
mimetype, data = html._mime_()
744-
return {"mimetype": mimetype, "data": data}
745-
except Exception:
746-
LOGGER.debug(
747-
"Error converting matplotlib figures to HTML",
748-
exc_info=True,
749-
)
750-
751-
return value

‎marimo/_plugins/ui/_impl/tables/pandas_table.py‎

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,9 +116,6 @@ def to_json(result: pd.DataFrame) -> list[dict[str, Any]]:
116116
if inferred_dtype == "date":
117117
result[col] = result[col].apply(str)
118118

119-
result[col] = result[col].apply(
120-
self._sanitize_table_value
121-
)
122119
# Cast bytes to string to avoid overflow error
123120
if self._infer_dtype(col) == "bytes":
124121
result[col] = result[col].apply(str)

‎marimo/_plugins/ui/_impl/tables/polars_table.py‎

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,11 @@ def to_csv_str(
110110
def to_json_str(
111111
self, format_mapping: Optional[FormatMapping] = None
112112
) -> str:
113+
def to_json(result: pl.DataFrame) -> list[dict[str, Any]]:
114+
# Use to_dicts instead of write_json
115+
# Preserves certain types like nans, infs, -infs, etc.
116+
return result.to_dicts()
117+
113118
result = self.apply_formatting(format_mapping).collect()
114119
try:
115120
for column in result.get_columns():
@@ -127,7 +132,7 @@ def to_json_str(
127132
result = result.with_columns(
128133
pl.col(column.name).cast(pl.List(pl.String))
129134
)
130-
return sanitize_json_bigint(result.write_json())
135+
return sanitize_json_bigint(to_json(result))
131136
except (
132137
BaseException
133138
): # Sometimes, polars throws a generic exception
@@ -169,7 +174,7 @@ def to_json_str(
169174
", ".join(f"'{col}'" for col in converted_columns),
170175
)
171176

172-
return sanitize_json_bigint(result.write_json())
177+
return sanitize_json_bigint(to_json(result))
173178

174179
def _convert_time_to_string(
175180
self, result: pl.DataFrame, column: pl.Series
@@ -189,7 +194,7 @@ def _cast_object_to_string(
189194
# As of writing this, cast(pl.String) doesn't work
190195
# for pl.Object types, so we use map_elements
191196
column.map_elements(
192-
lambda v: str(self._sanitize_table_value(v)),
197+
lambda v: str(v),
193198
return_dtype=pl.String,
194199
)
195200
)

‎marimo/this.py‎

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# Copyright 2025 Marimo. All rights reserved.
12
print("""The marimo contract
23
34
To use marimo means entering into a social contract.

‎tests/_messaging/test_enc_hook.py‎

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Copyright 2025 Marimo. All rights reserved.
2+
3+
import json
4+
5+
import pytest
6+
7+
from marimo._dependencies.dependencies import DependencyManager
8+
from marimo._messaging.msgspec_encoder import enc_hook
9+
10+
11+
def test_enc_hook() -> None:
12+
"""Test the enc_hook function."""
13+
assert enc_hook(1) == 1
14+
assert enc_hook("hello") == "hello"
15+
assert enc_hook(3.14) == 3.14
16+
assert enc_hook(True) is True
17+
assert enc_hook(None) is None
18+
19+
if DependencyManager.numpy.imported():
20+
import numpy as np
21+
22+
assert enc_hook(np.array([1, 2, 3])) == [1, 2, 3]
23+
assert enc_hook(np.array([1, 2, 3])) == [1, 2, 3]
24+
25+
26+
@pytest.mark.skipif(
27+
not DependencyManager.pillow.imported(),
28+
reason="Pillow not installed",
29+
)
30+
def test_serialize_pillow_image() -> None:
31+
from PIL import Image
32+
33+
img = Image.new("RGB", (10, 10), color="red")
34+
35+
result = enc_hook(img)
36+
37+
assert result is not None
38+
assert result.startswith("data:image/png;base64,")
39+
40+
41+
@pytest.mark.skipif(
42+
not DependencyManager.matplotlib.imported(),
43+
reason="Matplotlib not installed",
44+
)
45+
def test_serialize_matplotlib_figure() -> None:
46+
import matplotlib.pyplot as plt
47+
48+
fig, ax = plt.subplots()
49+
ax.plot([1, 2, 3], [1, 2, 3])
50+
51+
# Serialize the figure
52+
result = enc_hook(fig)
53+
for obj in [fig, ax]:
54+
result = enc_hook(obj)
55+
assert isinstance(result, dict)
56+
57+
assert "mimetype" in result
58+
assert "data" in result
59+
60+
assert "application/vnd.marimo+mimebundle" in result["mimetype"]
61+
assert "image/png" in result["data"]
62+
63+
image_data = json.loads(result["data"])
64+
assert "image/png" in image_data
65+
assert image_data["image/png"].startswith("data:image/png;base64,")
Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
strings,bool,int,large_int,float,datetime,date,struct,list,array,nulls,category,set,imaginary,time,duration,mixed_list,enum_list
2-
a,true,1,18446744073709551616,1.0,2021-01-01T00:00:00.000000,2021-01-01,"{""a"":1,""b"":2}","1,2",1,,cat,"{1, 2}",(1+2j),12:30:00.000000000,1d,"1,two","A,B,C"
3-
b,false,2,36893488147419103233,2.0,2021-01-02T00:00:00.000000,2021-01-02,"{""a"":3,""b"":4}","3,4",2,data,dog,"{3, 4}",(3+4j),13:45:00.000000000,315µs,"3.0,0.0","A,B,C"
4-
c,true,3,73786976294838206466,3.0,2021-01-03T00:00:00.000000,2021-01-03,"{""a"":5,""b"":6}","5,6",3,,mouse,"{5, 6}",(5+6j),14:15:00.000000000,2h 30m,2021-01-01 00:00:00.000000,"A,B,C"
1+
strings,bool,int,large_int,float,datetime,date,struct,list,array,nulls,category,set,imaginary,time,duration,nans,infs,mixed_list,enum_list
2+
a,true,1,18446744073709551616,1.0,2021-01-01T00:00:00.000000,2021-01-01,"{""a"":1,""b"":2}","1,2",1,,cat,"{1, 2}",(1+2j),12:30:00.000000000,1d,NaN,inf,"1,two","A,B,C"
3+
b,false,2,36893488147419103233,2.0,2021-01-02T00:00:00.000000,2021-01-02,"{""a"":3,""b"":4}","3,4",2,data,dog,"{3, 4}",(3+4j),13:45:00.000000000,315µs,NaN,-inf,"3.0,0.0","A,B,C"
4+
c,true,3,73786976294838206466,3.0,2021-01-03T00:00:00.000000,2021-01-03,"{""a"":5,""b"":6}","5,6",3,,mouse,"{5, 6}",(5+6j),14:15:00.000000000,2h 30m,NaN,inf,2021-01-01 00:00:00.000000,"A,B,C"

0 commit comments

Comments
 (0)