mirror of
https://github.com/gradio-app/gradio.git
synced 2025-04-12 12:40:29 +08:00
Clean up gr.DataFrame.postprocess()
and fix issue with getting headers of empty dataframes (#10476)
* changes * changes * add changeset * changes * format * changes * add changeset * notebook * fix --------- Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com>
This commit is contained in:
parent
2cf449abb3
commit
017ed46272
5
.changeset/lucky-towns-allow.md
Normal file
5
.changeset/lucky-towns-allow.md
Normal file
@ -0,0 +1,5 @@
|
||||
---
|
||||
"gradio": patch
|
||||
---
|
||||
|
||||
fix:Clean up `gr.DataFrame.postprocess()` and fix issue with getting headers of empty dataframes
|
File diff suppressed because one or more lines are too long
@ -1,3 +1,4 @@
|
||||
# type: ignore
|
||||
import gradio as gr
|
||||
import pandas as pd
|
||||
from pathlib import Path
|
||||
|
@ -74,7 +74,10 @@ class Dataframe(Component):
|
||||
headers: list[str] | None = None,
|
||||
row_count: int | tuple[int, str] = (1, "dynamic"),
|
||||
col_count: int | tuple[int, str] | None = None,
|
||||
datatype: str | list[str] = "str",
|
||||
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
|
||||
| Sequence[
|
||||
Literal["str", "number", "bool", "date", "markdown", "html"]
|
||||
] = "str",
|
||||
type: Literal["pandas", "numpy", "array", "polars"] = "pandas",
|
||||
latex_delimiters: list[dict[str, str | bool]] | None = None,
|
||||
label: str | None = None,
|
||||
@ -99,8 +102,8 @@ class Dataframe(Component):
|
||||
):
|
||||
"""
|
||||
Parameters:
|
||||
value: Default value to display in the DataFrame. If a Styler is provided, it will be used to set the displayed value in the DataFrame (e.g. to set precision of numbers) if the `interactive` is False. If a Callable function is provided, the function will be called whenever the app loads to set the initial value of the component.
|
||||
headers: List of str header names. If None, no headers are shown.
|
||||
value: Default value to display in the DataFrame. Supports pandas, numpy, polars, and list of lists. If a Styler is provided, it will be used to set the displayed value in the DataFrame (e.g. to set precision of numbers) if the `interactive` is False. If a Callable function is provided, the function will be called whenever the app loads to set the initial value of the component.
|
||||
headers: List of str header names. These are used to set the column headers of the dataframe if the value does not have headers. If None, no headers are shown.
|
||||
row_count: Limit number of rows for input and decide whether user can create new rows or delete existing rows. The first element of the tuple is an `int`, the row count; the second should be 'fixed' or 'dynamic', the new row behaviour. If an `int` is passed the rows default to 'dynamic'
|
||||
col_count: Limit number of columns for input and decide whether user can create new columns or delete existing columns. The first element of the tuple is an `int`, the number of columns; the second should be 'fixed' or 'dynamic', the new column behaviour. If an `int` is passed the columns default to 'dynamic'
|
||||
datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", "date", and "markdown".
|
||||
@ -150,24 +153,6 @@ class Dataframe(Component):
|
||||
"Polars is not installed. Please install using `pip install polars`."
|
||||
)
|
||||
self.type = type
|
||||
values = {
|
||||
"str": "",
|
||||
"number": 0,
|
||||
"bool": False,
|
||||
"date": "01/01/1970",
|
||||
"markdown": "",
|
||||
"html": "",
|
||||
}
|
||||
column_dtypes = (
|
||||
[datatype] * self.col_count[0] if isinstance(datatype, str) else datatype
|
||||
)
|
||||
self.empty_input = {
|
||||
"headers": self.headers,
|
||||
"data": [
|
||||
[values[c] for c in column_dtypes] for _ in range(self.row_count[0])
|
||||
],
|
||||
"metadata": None,
|
||||
}
|
||||
|
||||
if latex_delimiters is None:
|
||||
latex_delimiters = [{"left": "$$", "right": "$$", "display": True}]
|
||||
@ -235,7 +220,7 @@ class Dataframe(Component):
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _is_empty(
|
||||
def is_empty(
|
||||
value: pd.DataFrame
|
||||
| Styler
|
||||
| np.ndarray
|
||||
@ -246,9 +231,14 @@ class Dataframe(Component):
|
||||
| str
|
||||
| None,
|
||||
) -> bool:
|
||||
"""
|
||||
Checks if the value of the dataframe provided is empty.
|
||||
"""
|
||||
import pandas as pd
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
if value is None:
|
||||
return True
|
||||
if isinstance(value, pd.DataFrame):
|
||||
return value.empty
|
||||
elif isinstance(value, Styler):
|
||||
@ -257,12 +247,123 @@ class Dataframe(Component):
|
||||
return value.size == 0
|
||||
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
|
||||
return value.is_empty()
|
||||
elif isinstance(value, list) and len(value) and isinstance(value[0], list):
|
||||
return len(value[0]) == 0
|
||||
elif isinstance(value, (list, dict)):
|
||||
elif isinstance(value, list):
|
||||
if len(value) > 0 and isinstance(value[0], list):
|
||||
return len(value[0]) == 0
|
||||
return len(value) == 0
|
||||
elif isinstance(value, dict):
|
||||
if "data" in value:
|
||||
return len(value["data"]) == 0
|
||||
return len(value) == 0
|
||||
return False
|
||||
|
||||
def get_headers(
|
||||
self,
|
||||
value: pd.DataFrame
|
||||
| Styler
|
||||
| np.ndarray
|
||||
| pl.DataFrame
|
||||
| list
|
||||
| list[list]
|
||||
| dict
|
||||
| str
|
||||
| None,
|
||||
) -> list[str]:
|
||||
"""
|
||||
Returns the headers of the dataframes based on the value provided. For values
|
||||
that do not have headers, an empty list is returned.
|
||||
"""
|
||||
import pandas as pd
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
if value is None:
|
||||
return []
|
||||
if isinstance(value, pd.DataFrame):
|
||||
return list(value.columns)
|
||||
elif isinstance(value, Styler):
|
||||
return list(value.data.columns) # type: ignore
|
||||
elif isinstance(value, str):
|
||||
return list(pd.read_csv(value).columns)
|
||||
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
|
||||
return list(value.columns)
|
||||
elif isinstance(value, dict):
|
||||
return value.get("headers", [])
|
||||
elif isinstance(value, (list, np.ndarray)):
|
||||
return []
|
||||
return []
|
||||
|
||||
@staticmethod
|
||||
def get_cell_data(
|
||||
value: pd.DataFrame
|
||||
| Styler
|
||||
| np.ndarray
|
||||
| pl.DataFrame
|
||||
| list
|
||||
| list[list]
|
||||
| dict
|
||||
| str
|
||||
| None,
|
||||
) -> list[list[Any]]:
|
||||
"""
|
||||
Gets the cell data (as a list of lists) from the value provided.
|
||||
"""
|
||||
import pandas as pd
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
if isinstance(value, dict):
|
||||
return value.get("data", [[]])
|
||||
if isinstance(value, (str, pd.DataFrame)):
|
||||
if isinstance(value, str):
|
||||
value = pd.read_csv(value) # type: ignore
|
||||
return value.to_dict(orient="split")["data"]
|
||||
elif isinstance(value, Styler):
|
||||
df: pd.DataFrame = value.data # type: ignore
|
||||
hidden_columns = getattr(value, "hidden_columns", [])
|
||||
visible_cols = [
|
||||
i for i, _ in enumerate(df.columns) if i not in hidden_columns
|
||||
]
|
||||
df = df.iloc[:, visible_cols]
|
||||
return df.to_dict(orient="split")["data"]
|
||||
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
|
||||
df_dict = value.to_dict() # type: ignore
|
||||
data = list(zip(*df_dict.values()))
|
||||
return data
|
||||
elif isinstance(value, (np.ndarray, list)):
|
||||
if isinstance(value, np.ndarray):
|
||||
value = value.tolist()
|
||||
if not isinstance(value, list):
|
||||
raise ValueError("output cannot be converted to list")
|
||||
if not isinstance(value[0], list):
|
||||
return [[v] for v in value]
|
||||
return value
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Cannot process value of type {type(value)} in gr.Dataframe"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def get_metadata(
|
||||
value: pd.DataFrame
|
||||
| Styler
|
||||
| np.ndarray
|
||||
| pl.DataFrame
|
||||
| list
|
||||
| list[list]
|
||||
| dict
|
||||
| str
|
||||
| None,
|
||||
) -> dict[str, list[list]] | None:
|
||||
"""
|
||||
Gets the metadata from the value provided.
|
||||
"""
|
||||
from pandas.io.formats.style import Styler
|
||||
|
||||
if isinstance(value, Styler):
|
||||
return Dataframe.__extract_metadata(
|
||||
value, getattr(value, "hidden_columns", [])
|
||||
)
|
||||
return None
|
||||
|
||||
def postprocess(
|
||||
self,
|
||||
value: pd.DataFrame
|
||||
@ -290,86 +391,30 @@ class Dataframe(Component):
|
||||
raise ValueError(
|
||||
"Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
|
||||
)
|
||||
if isinstance(value, Styler) and self.interactive:
|
||||
warnings.warn(
|
||||
"Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
|
||||
)
|
||||
|
||||
if value is None or self._is_empty(value):
|
||||
return DataframeData(
|
||||
headers=self.headers, data=[["" for _ in range(len(self.headers))]]
|
||||
)
|
||||
if isinstance(value, dict):
|
||||
if len(value) == 0:
|
||||
return DataframeData(
|
||||
headers=self.headers, data=[["" for _ in range(len(self.headers))]]
|
||||
)
|
||||
return DataframeData(
|
||||
headers=value.get("headers", []), data=value.get("data", [[]])
|
||||
)
|
||||
if isinstance(value, (str, pd.DataFrame)):
|
||||
if isinstance(value, str):
|
||||
value = pd.read_csv(value) # type: ignore
|
||||
if len(value) == 0:
|
||||
return DataframeData(
|
||||
headers=[str(col) for col in value.columns], # Convert to strings
|
||||
data=[["" for _ in range(len(value.columns))]],
|
||||
)
|
||||
return DataframeData(
|
||||
headers=[str(col) for col in value.columns],
|
||||
data=value.to_dict(orient="split")["data"],
|
||||
)
|
||||
elif isinstance(value, Styler):
|
||||
if self.interactive:
|
||||
warnings.warn(
|
||||
"Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
|
||||
)
|
||||
df: pd.DataFrame = value.data # type: ignore
|
||||
visible_cols = [
|
||||
i
|
||||
for i, col in enumerate(df.columns)
|
||||
if i not in getattr(value, "hidden_columns", [])
|
||||
headers = self.get_headers(value) or self.headers
|
||||
data = (
|
||||
[["" for _ in range(len(headers))]]
|
||||
if self.is_empty(value)
|
||||
else self.get_cell_data(value)
|
||||
)
|
||||
if len(headers) > len(data[0]):
|
||||
headers = headers[: len(data[0])]
|
||||
elif len(headers) < len(data[0]):
|
||||
headers = [
|
||||
*headers,
|
||||
*[str(i) for i in range(len(headers) + 1, len(data[0]) + 1)],
|
||||
]
|
||||
df = df.iloc[:, visible_cols]
|
||||
|
||||
if len(df) == 0:
|
||||
return DataframeData(
|
||||
headers=list(df.columns),
|
||||
data=[["" for _ in range(len(df.columns))]],
|
||||
metadata=self.__extract_metadata(
|
||||
value, getattr(value, "hidden_columns", [])
|
||||
), # type: ignore
|
||||
)
|
||||
return DataframeData(
|
||||
headers=list(df.columns),
|
||||
data=df.to_dict(orient="split")["data"], # type: ignore
|
||||
metadata=self.__extract_metadata(
|
||||
value, getattr(value, "hidden_columns", [])
|
||||
), # type: ignore
|
||||
)
|
||||
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
|
||||
if len(value) == 0:
|
||||
return DataframeData(headers=list(value.to_dict().keys()), data=[[]]) # type: ignore
|
||||
df_dict = value.to_dict() # type: ignore
|
||||
headers = list(df_dict.keys())
|
||||
data = list(zip(*df_dict.values()))
|
||||
return DataframeData(headers=headers, data=data)
|
||||
elif isinstance(value, (np.ndarray, list)):
|
||||
if len(value) == 0:
|
||||
return DataframeData(headers=self.headers, data=[[]])
|
||||
if isinstance(value, np.ndarray):
|
||||
value = value.tolist()
|
||||
if not isinstance(value, list):
|
||||
raise ValueError("output cannot be converted to list")
|
||||
|
||||
_headers = self.headers
|
||||
if len(self.headers) < len(value[0]):
|
||||
_headers: list[str] = [
|
||||
*self.headers,
|
||||
*[str(i) for i in range(len(self.headers) + 1, len(value[0]) + 1)],
|
||||
]
|
||||
elif len(self.headers) > len(value[0]):
|
||||
_headers = self.headers[: len(value[0])]
|
||||
|
||||
return DataframeData(headers=_headers, data=value)
|
||||
else:
|
||||
raise ValueError("Cannot process value as a Dataframe")
|
||||
metadata = self.get_metadata(value)
|
||||
return DataframeData(
|
||||
headers=headers,
|
||||
data=data,
|
||||
metadata=metadata, # type: ignore
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def __get_cell_style(cell_id: str, cell_styles: list[dict]) -> str:
|
||||
|
@ -579,7 +579,10 @@ class Numpy(components.Dataframe):
|
||||
headers: list[str] | None = None,
|
||||
row_count: int | tuple[int, str] = (1, "dynamic"),
|
||||
col_count: int | tuple[int, str] | None = None,
|
||||
datatype: str | list[str] = "str",
|
||||
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
|
||||
| Sequence[
|
||||
Literal["str", "number", "bool", "date", "markdown", "html"]
|
||||
] = "str",
|
||||
type: Literal["numpy"] = "numpy",
|
||||
latex_delimiters: list[dict[str, str | bool]] | None = None,
|
||||
label: str | None = None,
|
||||
@ -649,7 +652,10 @@ class Matrix(components.Dataframe):
|
||||
headers: list[str] | None = None,
|
||||
row_count: int | tuple[int, str] = (1, "dynamic"),
|
||||
col_count: int | tuple[int, str] | None = None,
|
||||
datatype: str | list[str] = "str",
|
||||
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
|
||||
| Sequence[
|
||||
Literal["str", "number", "bool", "date", "markdown", "html"]
|
||||
] = "str",
|
||||
type: Literal["array"] = "array",
|
||||
latex_delimiters: list[dict[str, str | bool]] | None = None,
|
||||
label: str | None = None,
|
||||
@ -719,7 +725,10 @@ class List(components.Dataframe):
|
||||
headers: list[str] | None = None,
|
||||
row_count: int | tuple[int, str] = (1, "dynamic"),
|
||||
col_count: Literal[1] = 1,
|
||||
datatype: str | list[str] = "str",
|
||||
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
|
||||
| Sequence[
|
||||
Literal["str", "number", "bool", "date", "markdown", "html"]
|
||||
] = "str",
|
||||
type: Literal["array"] = "array",
|
||||
latex_delimiters: list[dict[str, str | bool]] | None = None,
|
||||
label: str | None = None,
|
||||
|
@ -355,3 +355,42 @@ class TestDataframe:
|
||||
],
|
||||
},
|
||||
}
|
||||
|
||||
def test_is_empty(self):
|
||||
"""Test is_empty method with various data types"""
|
||||
df = gr.Dataframe()
|
||||
assert df.is_empty([])
|
||||
assert df.is_empty([[]])
|
||||
assert df.is_empty(np.array([]))
|
||||
assert df.is_empty(np.zeros((2, 0)))
|
||||
assert df.is_empty(None)
|
||||
assert df.is_empty({})
|
||||
assert df.is_empty({"data": [], "headers": ["a", "b"]})
|
||||
assert not df.is_empty({"data": [1, 2]})
|
||||
assert not df.is_empty([[1, 2], [3, 4]])
|
||||
assert not df.is_empty(pd.DataFrame({"a": [1, 2]}))
|
||||
assert not df.is_empty(pd.DataFrame({"a": [1, 2]}).style)
|
||||
|
||||
def test_get_headers(self):
|
||||
"""Test get_headers method with various data types"""
|
||||
df = gr.Dataframe()
|
||||
test_df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
|
||||
assert df.get_headers(test_df) == ["col1", "col2"]
|
||||
assert df.get_headers(test_df.style) == ["col1", "col2"]
|
||||
assert df.get_headers({"headers": ["a", "b"]}) == ["a", "b"]
|
||||
assert df.get_headers(np.array([[1, 2], [3, 4]])) == []
|
||||
assert df.get_headers(None) == []
|
||||
|
||||
def test_get_cell_data(self):
|
||||
"""Test get_cell_data method with various data types"""
|
||||
df = gr.Dataframe()
|
||||
test_data = [[1, 2], [3, 4]]
|
||||
test_df = pd.DataFrame({"col1": [1, 3], "col2": [2, 4]})
|
||||
assert df.get_cell_data(test_data) == [[1, 2], [3, 4]]
|
||||
assert df.get_cell_data(test_df) == [[1, 2], [3, 4]]
|
||||
assert df.get_cell_data({"data": test_data}) == [[1, 2], [3, 4]]
|
||||
assert df.get_cell_data(np.array([1, 2, 3])) == [[1], [2], [3]]
|
||||
|
||||
styled_df = test_df.style
|
||||
styled_df.hide(axis=1, subset=["col2"])
|
||||
assert df.get_cell_data(styled_df) == [[1], [3]]
|
||||
|
Loading…
x
Reference in New Issue
Block a user