Clean up gr.DataFrame.postprocess() and fix issue with getting headers of empty dataframes (#10476)

* changes

* changes

* add changeset

* changes

* format

* changes

* add changeset

* notebook

* fix

---------

Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com>
This commit is contained in:
Abubakar Abid 2025-01-31 13:11:49 -08:00 committed by GitHub
parent 2cf449abb3
commit 017ed46272
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 206 additions and 107 deletions

View File

@ -0,0 +1,5 @@
---
"gradio": patch
---
fix:Clean up `gr.DataFrame.postprocess()` and fix issue with getting headers of empty dataframes

File diff suppressed because one or more lines are too long

View File

@ -1,3 +1,4 @@
# type: ignore
import gradio as gr
import pandas as pd
from pathlib import Path

View File

@ -74,7 +74,10 @@ class Dataframe(Component):
headers: list[str] | None = None,
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: int | tuple[int, str] | None = None,
datatype: str | list[str] = "str",
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
| Sequence[
Literal["str", "number", "bool", "date", "markdown", "html"]
] = "str",
type: Literal["pandas", "numpy", "array", "polars"] = "pandas",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,
@ -99,8 +102,8 @@ class Dataframe(Component):
):
"""
Parameters:
value: Default value to display in the DataFrame. If a Styler is provided, it will be used to set the displayed value in the DataFrame (e.g. to set precision of numbers) if the `interactive` is False. If a Callable function is provided, the function will be called whenever the app loads to set the initial value of the component.
headers: List of str header names. If None, no headers are shown.
value: Default value to display in the DataFrame. Supports pandas, numpy, polars, and list of lists. If a Styler is provided, it will be used to set the displayed value in the DataFrame (e.g. to set precision of numbers) if the `interactive` is False. If a Callable function is provided, the function will be called whenever the app loads to set the initial value of the component.
headers: List of str header names. These are used to set the column headers of the dataframe if the value does not have headers. If None, no headers are shown.
row_count: Limit number of rows for input and decide whether user can create new rows or delete existing rows. The first element of the tuple is an `int`, the row count; the second should be 'fixed' or 'dynamic', the new row behaviour. If an `int` is passed the rows default to 'dynamic'
col_count: Limit number of columns for input and decide whether user can create new columns or delete existing columns. The first element of the tuple is an `int`, the number of columns; the second should be 'fixed' or 'dynamic', the new column behaviour. If an `int` is passed the columns default to 'dynamic'
datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", "date", and "markdown".
@ -150,24 +153,6 @@ class Dataframe(Component):
"Polars is not installed. Please install using `pip install polars`."
)
self.type = type
values = {
"str": "",
"number": 0,
"bool": False,
"date": "01/01/1970",
"markdown": "",
"html": "",
}
column_dtypes = (
[datatype] * self.col_count[0] if isinstance(datatype, str) else datatype
)
self.empty_input = {
"headers": self.headers,
"data": [
[values[c] for c in column_dtypes] for _ in range(self.row_count[0])
],
"metadata": None,
}
if latex_delimiters is None:
latex_delimiters = [{"left": "$$", "right": "$$", "display": True}]
@ -235,7 +220,7 @@ class Dataframe(Component):
)
@staticmethod
def _is_empty(
def is_empty(
value: pd.DataFrame
| Styler
| np.ndarray
@ -246,9 +231,14 @@ class Dataframe(Component):
| str
| None,
) -> bool:
"""
Checks if the value of the dataframe provided is empty.
"""
import pandas as pd
from pandas.io.formats.style import Styler
if value is None:
return True
if isinstance(value, pd.DataFrame):
return value.empty
elif isinstance(value, Styler):
@ -257,12 +247,123 @@ class Dataframe(Component):
return value.size == 0
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
return value.is_empty()
elif isinstance(value, list) and len(value) and isinstance(value[0], list):
return len(value[0]) == 0
elif isinstance(value, (list, dict)):
elif isinstance(value, list):
if len(value) > 0 and isinstance(value[0], list):
return len(value[0]) == 0
return len(value) == 0
elif isinstance(value, dict):
if "data" in value:
return len(value["data"]) == 0
return len(value) == 0
return False
def get_headers(
self,
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict
| str
| None,
) -> list[str]:
"""
Returns the headers of the dataframes based on the value provided. For values
that do not have headers, an empty list is returned.
"""
import pandas as pd
from pandas.io.formats.style import Styler
if value is None:
return []
if isinstance(value, pd.DataFrame):
return list(value.columns)
elif isinstance(value, Styler):
return list(value.data.columns) # type: ignore
elif isinstance(value, str):
return list(pd.read_csv(value).columns)
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
return list(value.columns)
elif isinstance(value, dict):
return value.get("headers", [])
elif isinstance(value, (list, np.ndarray)):
return []
return []
@staticmethod
def get_cell_data(
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict
| str
| None,
) -> list[list[Any]]:
"""
Gets the cell data (as a list of lists) from the value provided.
"""
import pandas as pd
from pandas.io.formats.style import Styler
if isinstance(value, dict):
return value.get("data", [[]])
if isinstance(value, (str, pd.DataFrame)):
if isinstance(value, str):
value = pd.read_csv(value) # type: ignore
return value.to_dict(orient="split")["data"]
elif isinstance(value, Styler):
df: pd.DataFrame = value.data # type: ignore
hidden_columns = getattr(value, "hidden_columns", [])
visible_cols = [
i for i, _ in enumerate(df.columns) if i not in hidden_columns
]
df = df.iloc[:, visible_cols]
return df.to_dict(orient="split")["data"]
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
df_dict = value.to_dict() # type: ignore
data = list(zip(*df_dict.values()))
return data
elif isinstance(value, (np.ndarray, list)):
if isinstance(value, np.ndarray):
value = value.tolist()
if not isinstance(value, list):
raise ValueError("output cannot be converted to list")
if not isinstance(value[0], list):
return [[v] for v in value]
return value
else:
raise ValueError(
f"Cannot process value of type {type(value)} in gr.Dataframe"
)
@staticmethod
def get_metadata(
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict
| str
| None,
) -> dict[str, list[list]] | None:
"""
Gets the metadata from the value provided.
"""
from pandas.io.formats.style import Styler
if isinstance(value, Styler):
return Dataframe.__extract_metadata(
value, getattr(value, "hidden_columns", [])
)
return None
def postprocess(
self,
value: pd.DataFrame
@ -290,86 +391,30 @@ class Dataframe(Component):
raise ValueError(
"Styler objects are only supported in pandas version 1.5.0 or higher. Please try: `pip install --upgrade pandas` to use this feature."
)
if isinstance(value, Styler) and self.interactive:
warnings.warn(
"Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
)
if value is None or self._is_empty(value):
return DataframeData(
headers=self.headers, data=[["" for _ in range(len(self.headers))]]
)
if isinstance(value, dict):
if len(value) == 0:
return DataframeData(
headers=self.headers, data=[["" for _ in range(len(self.headers))]]
)
return DataframeData(
headers=value.get("headers", []), data=value.get("data", [[]])
)
if isinstance(value, (str, pd.DataFrame)):
if isinstance(value, str):
value = pd.read_csv(value) # type: ignore
if len(value) == 0:
return DataframeData(
headers=[str(col) for col in value.columns], # Convert to strings
data=[["" for _ in range(len(value.columns))]],
)
return DataframeData(
headers=[str(col) for col in value.columns],
data=value.to_dict(orient="split")["data"],
)
elif isinstance(value, Styler):
if self.interactive:
warnings.warn(
"Cannot display Styler object in interactive mode. Will display as a regular pandas dataframe instead."
)
df: pd.DataFrame = value.data # type: ignore
visible_cols = [
i
for i, col in enumerate(df.columns)
if i not in getattr(value, "hidden_columns", [])
headers = self.get_headers(value) or self.headers
data = (
[["" for _ in range(len(headers))]]
if self.is_empty(value)
else self.get_cell_data(value)
)
if len(headers) > len(data[0]):
headers = headers[: len(data[0])]
elif len(headers) < len(data[0]):
headers = [
*headers,
*[str(i) for i in range(len(headers) + 1, len(data[0]) + 1)],
]
df = df.iloc[:, visible_cols]
if len(df) == 0:
return DataframeData(
headers=list(df.columns),
data=[["" for _ in range(len(df.columns))]],
metadata=self.__extract_metadata(
value, getattr(value, "hidden_columns", [])
), # type: ignore
)
return DataframeData(
headers=list(df.columns),
data=df.to_dict(orient="split")["data"], # type: ignore
metadata=self.__extract_metadata(
value, getattr(value, "hidden_columns", [])
), # type: ignore
)
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
if len(value) == 0:
return DataframeData(headers=list(value.to_dict().keys()), data=[[]]) # type: ignore
df_dict = value.to_dict() # type: ignore
headers = list(df_dict.keys())
data = list(zip(*df_dict.values()))
return DataframeData(headers=headers, data=data)
elif isinstance(value, (np.ndarray, list)):
if len(value) == 0:
return DataframeData(headers=self.headers, data=[[]])
if isinstance(value, np.ndarray):
value = value.tolist()
if not isinstance(value, list):
raise ValueError("output cannot be converted to list")
_headers = self.headers
if len(self.headers) < len(value[0]):
_headers: list[str] = [
*self.headers,
*[str(i) for i in range(len(self.headers) + 1, len(value[0]) + 1)],
]
elif len(self.headers) > len(value[0]):
_headers = self.headers[: len(value[0])]
return DataframeData(headers=_headers, data=value)
else:
raise ValueError("Cannot process value as a Dataframe")
metadata = self.get_metadata(value)
return DataframeData(
headers=headers,
data=data,
metadata=metadata, # type: ignore
)
@staticmethod
def __get_cell_style(cell_id: str, cell_styles: list[dict]) -> str:

View File

@ -579,7 +579,10 @@ class Numpy(components.Dataframe):
headers: list[str] | None = None,
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: int | tuple[int, str] | None = None,
datatype: str | list[str] = "str",
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
| Sequence[
Literal["str", "number", "bool", "date", "markdown", "html"]
] = "str",
type: Literal["numpy"] = "numpy",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,
@ -649,7 +652,10 @@ class Matrix(components.Dataframe):
headers: list[str] | None = None,
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: int | tuple[int, str] | None = None,
datatype: str | list[str] = "str",
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
| Sequence[
Literal["str", "number", "bool", "date", "markdown", "html"]
] = "str",
type: Literal["array"] = "array",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,
@ -719,7 +725,10 @@ class List(components.Dataframe):
headers: list[str] | None = None,
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: Literal[1] = 1,
datatype: str | list[str] = "str",
datatype: Literal["str", "number", "bool", "date", "markdown", "html"]
| Sequence[
Literal["str", "number", "bool", "date", "markdown", "html"]
] = "str",
type: Literal["array"] = "array",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,

View File

@ -355,3 +355,42 @@ class TestDataframe:
],
},
}
def test_is_empty(self):
"""Test is_empty method with various data types"""
df = gr.Dataframe()
assert df.is_empty([])
assert df.is_empty([[]])
assert df.is_empty(np.array([]))
assert df.is_empty(np.zeros((2, 0)))
assert df.is_empty(None)
assert df.is_empty({})
assert df.is_empty({"data": [], "headers": ["a", "b"]})
assert not df.is_empty({"data": [1, 2]})
assert not df.is_empty([[1, 2], [3, 4]])
assert not df.is_empty(pd.DataFrame({"a": [1, 2]}))
assert not df.is_empty(pd.DataFrame({"a": [1, 2]}).style)
def test_get_headers(self):
"""Test get_headers method with various data types"""
df = gr.Dataframe()
test_df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
assert df.get_headers(test_df) == ["col1", "col2"]
assert df.get_headers(test_df.style) == ["col1", "col2"]
assert df.get_headers({"headers": ["a", "b"]}) == ["a", "b"]
assert df.get_headers(np.array([[1, 2], [3, 4]])) == []
assert df.get_headers(None) == []
def test_get_cell_data(self):
"""Test get_cell_data method with various data types"""
df = gr.Dataframe()
test_data = [[1, 2], [3, 4]]
test_df = pd.DataFrame({"col1": [1, 3], "col2": [2, 4]})
assert df.get_cell_data(test_data) == [[1, 2], [3, 4]]
assert df.get_cell_data(test_df) == [[1, 2], [3, 4]]
assert df.get_cell_data({"data": test_data}) == [[1, 2], [3, 4]]
assert df.get_cell_data(np.array([1, 2, 3])) == [[1], [2], [3]]
styled_df = test_df.style
styled_df.hide(axis=1, subset=["col2"])
assert df.get_cell_data(styled_df) == [[1], [3]]