Prevent Dataframe post-processing from changing dtypes (#1979)

* Fix datetime logic python * Fix docstring * Add demo * Update test
2025-02-17 11:29:58 +08:00 · 2022-08-08 21:28:07 -04:00 · 2022-08-08 21:28:07 -04:00 · d7c1a9eec4
commit d7c1a9eec4
parent 5fe02164f9
3 changed files with 85 additions and 4 deletions
--- a/demo/dataframe_datatype/run.py
+++ b/demo/dataframe_datatype/run.py
@ -0,0 +1,21 @@
+import gradio as gr
+import pandas as pd
+import numpy as np
+
+
+def make_dataframe(n_periods):
+    return pd.DataFrame({"date_1": pd.date_range("2021-01-01", periods=n_periods),
+                         "date_2": pd.date_range("2022-02-15", periods=n_periods).strftime('%B %d, %Y, %r'),
+                         "number": np.random.random(n_periods).astype(np.float64),
+                         "number_2": np.random.randint(0, 100, n_periods).astype(np.int32),
+                         "bool": [True] * n_periods,
+                         "markdown": ["# Hello"] * n_periods})
+
+
+demo = gr.Interface(make_dataframe,
+             gr.Number(precision=0),
+             gr.Dataframe(datatype=["date", "date", "number", "number", "bool", "markdown"]))
+
+
+if __name__ == "__main__":
+    demo.launch()
--- a/gradio/components.py
+++ b/gradio/components.py
@ -2535,7 +2535,7 @@ class Dataframe(Changeable, IOComponent):
            headers: List of str header names. If None, no headers are shown.
            row_count: Limit number of rows for input and decide whether user can create new rows. The first element of the tuple is an `int`, the row count; the second should be 'fixed' or 'dynamic', the new row behaviour. If an `int` is passed the rows default to 'dynamic'
            col_count: Limit number of columns for input and decide whether user can create new columns. The first element of the tuple is an `int`, the number of columns; the second should be 'fixed' or 'dynamic', the new column behaviour. If an `int` is passed the columns default to 'dynamic'
-            datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", and "date".
+            datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", "date", and "markdown".
            type: Type of value to be returned by component. "pandas" for pandas dataframe, "numpy" for numpy array, or "array" for a Python array.
            label: component name in interface.
            max_rows: Maximum number of rows to display at once. Set to None for infinite.
@ -2691,12 +2691,16 @@ class Dataframe(Changeable, IOComponent):
            y = pd.read_csv(y)
            return {
                "headers": list(y.columns),
-                "data": Dataframe.__process_markdown(y.values.tolist(), self.datatype),
+                "data": Dataframe.__process_markdown(
+                    y.to_dict(orient="split")["data"], self.datatype
+                ),
            }
        if isinstance(y, pd.DataFrame):
            return {
                "headers": list(y.columns),
-                "data": Dataframe.__process_markdown(y.values.tolist(), self.datatype),
+                "data": Dataframe.__process_markdown(
+                    y.to_dict(orient="split")["data"], self.datatype
+                ),
            }
        if isinstance(y, (np.ndarray, list)):
            if isinstance(y, np.ndarray):
@ -2710,7 +2714,7 @@ class Dataframe(Changeable, IOComponent):
                    *list(range(len(self.headers) + 1, len(y[0]) + 1)),
                ]
            elif len(self.headers) > len(y[0]):
-                _headers = self.headers[0 : len(y[0])]
+                _headers = self.headers[: len(y[0])]

            return {
                "headers": _headers,
--- a/test/test_components.py
+++ b/test/test_components.py
@ -1846,5 +1846,61 @@ def test_slider_rounds_when_using_default_randomizer(mock_randint):
    mock_randint.assert_called()


+def test_dataframe_postprocess_all_types():
+    df = pd.DataFrame(
+        {
+            "date_1": pd.date_range("2021-01-01", periods=2),
+            "date_2": pd.date_range("2022-02-15", periods=2).strftime("%B %d, %Y, %r"),
+            "number": np.array([0.2233, 0.57281]),
+            "number_2": np.array([84, 23]).astype(np.int),
+            "bool": [True, False],
+            "markdown": ["# Hello", "# Goodbye"],
+        }
+    )
+    component = gr.Dataframe(
+        datatype=["date", "date", "number", "number", "bool", "markdown"]
+    )
+    output = component.postprocess(df)
+    assert output == {
+        "headers": list(df.columns),
+        "data": [
+            [
+                pd.Timestamp("2021-01-01 00:00:00"),
+                "February 15, 2022, 12:00:00 AM",
+                0.2233,
+                84,
+                True,
+                "<h1>Hello</h1>\n",
+            ],
+            [
+                pd.Timestamp("2021-01-02 00:00:00"),
+                "February 16, 2022, 12:00:00 AM",
+                0.57281,
+                23,
+                False,
+                "<h1>Goodbye</h1>\n",
+            ],
+        ],
+    }
+
+
+def test_dataframe_postprocess_only_dates():
+    df = pd.DataFrame(
+        {
+            "date_1": pd.date_range("2021-01-01", periods=2),
+            "date_2": pd.date_range("2022-02-15", periods=2),
+        }
+    )
+    component = gr.Dataframe(datatype=["date", "date"])
+    output = component.postprocess(df)
+    assert output == {
+        "headers": list(df.columns),
+        "data": [
+            [pd.Timestamp("2021-01-01 00:00:00"), pd.Timestamp("2022-02-15 00:00:00")],
+            [pd.Timestamp("2021-01-02 00:00:00"), pd.Timestamp("2022-02-16 00:00:00")],
+        ],
+    }
+
+
 if __name__ == "__main__":
    unittest.main()