mirror of
https://github.com/gradio-app/gradio.git
synced 2025-02-17 11:29:58 +08:00
Prevent Dataframe post-processing from changing dtypes (#1979)
* Fix datetime logic python * Fix docstring * Add demo * Update test
This commit is contained in:
parent
5fe02164f9
commit
d7c1a9eec4
21
demo/dataframe_datatype/run.py
Normal file
21
demo/dataframe_datatype/run.py
Normal file
@ -0,0 +1,21 @@
|
||||
import gradio as gr
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
def make_dataframe(n_periods):
|
||||
return pd.DataFrame({"date_1": pd.date_range("2021-01-01", periods=n_periods),
|
||||
"date_2": pd.date_range("2022-02-15", periods=n_periods).strftime('%B %d, %Y, %r'),
|
||||
"number": np.random.random(n_periods).astype(np.float64),
|
||||
"number_2": np.random.randint(0, 100, n_periods).astype(np.int32),
|
||||
"bool": [True] * n_periods,
|
||||
"markdown": ["# Hello"] * n_periods})
|
||||
|
||||
|
||||
demo = gr.Interface(make_dataframe,
|
||||
gr.Number(precision=0),
|
||||
gr.Dataframe(datatype=["date", "date", "number", "number", "bool", "markdown"]))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
demo.launch()
|
@ -2535,7 +2535,7 @@ class Dataframe(Changeable, IOComponent):
|
||||
headers: List of str header names. If None, no headers are shown.
|
||||
row_count: Limit number of rows for input and decide whether user can create new rows. The first element of the tuple is an `int`, the row count; the second should be 'fixed' or 'dynamic', the new row behaviour. If an `int` is passed the rows default to 'dynamic'
|
||||
col_count: Limit number of columns for input and decide whether user can create new columns. The first element of the tuple is an `int`, the number of columns; the second should be 'fixed' or 'dynamic', the new column behaviour. If an `int` is passed the columns default to 'dynamic'
|
||||
datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", and "date".
|
||||
datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", "date", and "markdown".
|
||||
type: Type of value to be returned by component. "pandas" for pandas dataframe, "numpy" for numpy array, or "array" for a Python array.
|
||||
label: component name in interface.
|
||||
max_rows: Maximum number of rows to display at once. Set to None for infinite.
|
||||
@ -2691,12 +2691,16 @@ class Dataframe(Changeable, IOComponent):
|
||||
y = pd.read_csv(y)
|
||||
return {
|
||||
"headers": list(y.columns),
|
||||
"data": Dataframe.__process_markdown(y.values.tolist(), self.datatype),
|
||||
"data": Dataframe.__process_markdown(
|
||||
y.to_dict(orient="split")["data"], self.datatype
|
||||
),
|
||||
}
|
||||
if isinstance(y, pd.DataFrame):
|
||||
return {
|
||||
"headers": list(y.columns),
|
||||
"data": Dataframe.__process_markdown(y.values.tolist(), self.datatype),
|
||||
"data": Dataframe.__process_markdown(
|
||||
y.to_dict(orient="split")["data"], self.datatype
|
||||
),
|
||||
}
|
||||
if isinstance(y, (np.ndarray, list)):
|
||||
if isinstance(y, np.ndarray):
|
||||
@ -2710,7 +2714,7 @@ class Dataframe(Changeable, IOComponent):
|
||||
*list(range(len(self.headers) + 1, len(y[0]) + 1)),
|
||||
]
|
||||
elif len(self.headers) > len(y[0]):
|
||||
_headers = self.headers[0 : len(y[0])]
|
||||
_headers = self.headers[: len(y[0])]
|
||||
|
||||
return {
|
||||
"headers": _headers,
|
||||
|
@ -1846,5 +1846,61 @@ def test_slider_rounds_when_using_default_randomizer(mock_randint):
|
||||
mock_randint.assert_called()
|
||||
|
||||
|
||||
def test_dataframe_postprocess_all_types():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"date_1": pd.date_range("2021-01-01", periods=2),
|
||||
"date_2": pd.date_range("2022-02-15", periods=2).strftime("%B %d, %Y, %r"),
|
||||
"number": np.array([0.2233, 0.57281]),
|
||||
"number_2": np.array([84, 23]).astype(np.int),
|
||||
"bool": [True, False],
|
||||
"markdown": ["# Hello", "# Goodbye"],
|
||||
}
|
||||
)
|
||||
component = gr.Dataframe(
|
||||
datatype=["date", "date", "number", "number", "bool", "markdown"]
|
||||
)
|
||||
output = component.postprocess(df)
|
||||
assert output == {
|
||||
"headers": list(df.columns),
|
||||
"data": [
|
||||
[
|
||||
pd.Timestamp("2021-01-01 00:00:00"),
|
||||
"February 15, 2022, 12:00:00 AM",
|
||||
0.2233,
|
||||
84,
|
||||
True,
|
||||
"<h1>Hello</h1>\n",
|
||||
],
|
||||
[
|
||||
pd.Timestamp("2021-01-02 00:00:00"),
|
||||
"February 16, 2022, 12:00:00 AM",
|
||||
0.57281,
|
||||
23,
|
||||
False,
|
||||
"<h1>Goodbye</h1>\n",
|
||||
],
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
def test_dataframe_postprocess_only_dates():
|
||||
df = pd.DataFrame(
|
||||
{
|
||||
"date_1": pd.date_range("2021-01-01", periods=2),
|
||||
"date_2": pd.date_range("2022-02-15", periods=2),
|
||||
}
|
||||
)
|
||||
component = gr.Dataframe(datatype=["date", "date"])
|
||||
output = component.postprocess(df)
|
||||
assert output == {
|
||||
"headers": list(df.columns),
|
||||
"data": [
|
||||
[pd.Timestamp("2021-01-01 00:00:00"), pd.Timestamp("2022-02-15 00:00:00")],
|
||||
[pd.Timestamp("2021-01-02 00:00:00"), pd.Timestamp("2022-02-16 00:00:00")],
|
||||
],
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user