Prevent Dataframe post-processing from changing dtypes (#1979)

* Fix datetime logic python

* Fix docstring

* Add demo

* Update test
This commit is contained in:
Freddy Boulton 2022-08-08 21:28:07 -04:00 committed by GitHub
parent 5fe02164f9
commit d7c1a9eec4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 85 additions and 4 deletions

View File

@ -0,0 +1,21 @@
import gradio as gr
import pandas as pd
import numpy as np
def make_dataframe(n_periods):
return pd.DataFrame({"date_1": pd.date_range("2021-01-01", periods=n_periods),
"date_2": pd.date_range("2022-02-15", periods=n_periods).strftime('%B %d, %Y, %r'),
"number": np.random.random(n_periods).astype(np.float64),
"number_2": np.random.randint(0, 100, n_periods).astype(np.int32),
"bool": [True] * n_periods,
"markdown": ["# Hello"] * n_periods})
demo = gr.Interface(make_dataframe,
gr.Number(precision=0),
gr.Dataframe(datatype=["date", "date", "number", "number", "bool", "markdown"]))
if __name__ == "__main__":
demo.launch()

View File

@ -2535,7 +2535,7 @@ class Dataframe(Changeable, IOComponent):
headers: List of str header names. If None, no headers are shown.
row_count: Limit number of rows for input and decide whether user can create new rows. The first element of the tuple is an `int`, the row count; the second should be 'fixed' or 'dynamic', the new row behaviour. If an `int` is passed the rows default to 'dynamic'
col_count: Limit number of columns for input and decide whether user can create new columns. The first element of the tuple is an `int`, the number of columns; the second should be 'fixed' or 'dynamic', the new column behaviour. If an `int` is passed the columns default to 'dynamic'
datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", and "date".
datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", "date", and "markdown".
type: Type of value to be returned by component. "pandas" for pandas dataframe, "numpy" for numpy array, or "array" for a Python array.
label: component name in interface.
max_rows: Maximum number of rows to display at once. Set to None for infinite.
@ -2691,12 +2691,16 @@ class Dataframe(Changeable, IOComponent):
y = pd.read_csv(y)
return {
"headers": list(y.columns),
"data": Dataframe.__process_markdown(y.values.tolist(), self.datatype),
"data": Dataframe.__process_markdown(
y.to_dict(orient="split")["data"], self.datatype
),
}
if isinstance(y, pd.DataFrame):
return {
"headers": list(y.columns),
"data": Dataframe.__process_markdown(y.values.tolist(), self.datatype),
"data": Dataframe.__process_markdown(
y.to_dict(orient="split")["data"], self.datatype
),
}
if isinstance(y, (np.ndarray, list)):
if isinstance(y, np.ndarray):
@ -2710,7 +2714,7 @@ class Dataframe(Changeable, IOComponent):
*list(range(len(self.headers) + 1, len(y[0]) + 1)),
]
elif len(self.headers) > len(y[0]):
_headers = self.headers[0 : len(y[0])]
_headers = self.headers[: len(y[0])]
return {
"headers": _headers,

View File

@ -1846,5 +1846,61 @@ def test_slider_rounds_when_using_default_randomizer(mock_randint):
mock_randint.assert_called()
def test_dataframe_postprocess_all_types():
df = pd.DataFrame(
{
"date_1": pd.date_range("2021-01-01", periods=2),
"date_2": pd.date_range("2022-02-15", periods=2).strftime("%B %d, %Y, %r"),
"number": np.array([0.2233, 0.57281]),
"number_2": np.array([84, 23]).astype(np.int),
"bool": [True, False],
"markdown": ["# Hello", "# Goodbye"],
}
)
component = gr.Dataframe(
datatype=["date", "date", "number", "number", "bool", "markdown"]
)
output = component.postprocess(df)
assert output == {
"headers": list(df.columns),
"data": [
[
pd.Timestamp("2021-01-01 00:00:00"),
"February 15, 2022, 12:00:00 AM",
0.2233,
84,
True,
"<h1>Hello</h1>\n",
],
[
pd.Timestamp("2021-01-02 00:00:00"),
"February 16, 2022, 12:00:00 AM",
0.57281,
23,
False,
"<h1>Goodbye</h1>\n",
],
],
}
def test_dataframe_postprocess_only_dates():
df = pd.DataFrame(
{
"date_1": pd.date_range("2021-01-01", periods=2),
"date_2": pd.date_range("2022-02-15", periods=2),
}
)
component = gr.Dataframe(datatype=["date", "date"])
output = component.postprocess(df)
assert output == {
"headers": list(df.columns),
"data": [
[pd.Timestamp("2021-01-01 00:00:00"), pd.Timestamp("2022-02-15 00:00:00")],
[pd.Timestamp("2021-01-02 00:00:00"), pd.Timestamp("2022-02-16 00:00:00")],
],
}
if __name__ == "__main__":
unittest.main()