Added polars dataframe support with demo (#7139)

* first commit

* minor update

* working polars

* fixed. demo added.

* add changeset

* dynamic import of polars

* Fix import logic

* add changeset

* Fix lint

* Add code

* Fix code

* Add code

* remove screenshot'

* delete

* add changeset

---------

Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com>
Co-authored-by: freddyaboulton <alfonsoboulton@gmail.com>
This commit is contained in:
cswamy 2024-01-25 17:56:54 +00:00 committed by GitHub
parent be56c76c7b
commit 6abad53677
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 94 additions and 12 deletions

View File

@ -0,0 +1,5 @@
---
"gradio": minor
---
feat:Added polars dataframe support with demo

View File

@ -0,0 +1,4 @@
Item,Quantity
apple,56
banana,12
orange,30
1 Item Quantity
2 apple 56
3 banana 12
4 orange 30

View File

@ -0,0 +1 @@
{"cells": [{"cell_type": "markdown", "id": "302934307671667531413257853548643485645", "metadata": {}, "source": ["# Gradio Demo: sort_records"]}, {"cell_type": "code", "execution_count": null, "id": "272996653310673477252411125948039410165", "metadata": {}, "outputs": [], "source": ["!pip install -q gradio "]}, {"cell_type": "code", "execution_count": null, "id": "288918539441861185822528903084949547379", "metadata": {}, "outputs": [], "source": ["# Downloading files from the demo repo\n", "import os\n", "!wget -q https://github.com/gradio-app/gradio/raw/main/demo/sort_records/polars_sort.csv"]}, {"cell_type": "code", "execution_count": null, "id": "44380577570523278879349135829904343037", "metadata": {}, "outputs": [], "source": ["import gradio as gr\n", "import os\n", "\n", "def sort_records(records):\n", " return records.sort(\"Quantity\")\n", "\n", "demo = gr.Interface(\n", " sort_records,\n", " gr.Dataframe(\n", " headers=[\"Item\", \"Quantity\"],\n", " datatype=[\"str\", \"number\"],\n", " row_count=3,\n", " col_count=(2, \"fixed\"),\n", " type=\"polars\"\n", " ),\n", " \"dataframe\",\n", " description=\"Sort by Quantity\",\n", " examples=[\n", " [os.path.join(os.path.abspath(''), \"polars_sort.csv\")],\n", " ],\n", ")\n", "\n", "if __name__ == \"__main__\":\n", " demo.launch()"]}], "metadata": {}, "nbformat": 4, "nbformat_minor": 5}

24
demo/sort_records/run.py Normal file
View File

@ -0,0 +1,24 @@
import gradio as gr
import os
def sort_records(records):
return records.sort("Quantity")
demo = gr.Interface(
sort_records,
gr.Dataframe(
headers=["Item", "Quantity"],
datatype=["str", "number"],
row_count=3,
col_count=(2, "fixed"),
type="polars"
),
"dataframe",
description="Sort by Quantity",
examples=[
[os.path.join(os.path.dirname(__file__), "polars_sort.csv")],
],
)
if __name__ == "__main__":
demo.launch()

View File

@ -6,7 +6,6 @@ from typing import Any, Optional
import requests
import tomlkit as toml
from rich import print
from typer import Argument, Option
from typing_extensions import Annotated

View File

@ -3,7 +3,17 @@
from __future__ import annotations
import warnings
from typing import Any, Callable, Dict, List, Literal, Optional
from typing import (
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
Literal,
Optional,
Tuple,
Union,
)
import numpy as np
import pandas as pd
@ -15,10 +25,26 @@ from gradio.components import Component
from gradio.data_classes import GradioModel
from gradio.events import Events
if TYPE_CHECKING:
import polars as pl # type: ignore
def _is_polars_available():
import importlib.util
spec = importlib.util.find_spec("polars")
return bool(spec)
def _import_polars():
import polars as pl # type: ignore
return pl
class DataframeData(GradioModel):
headers: List[str]
data: List[List[Any]]
data: Union[List[List[Any]], List[Tuple[Any, ...]]]
metadata: Optional[Dict[str, Optional[List[Any]]]] = None
@ -29,10 +55,10 @@ set_documentation_group("component")
class Dataframe(Component):
"""
Accepts or displays 2D input through a spreadsheet-like component for dataframes.
Preprocessing: passes the uploaded spreadsheet data as a {pandas.DataFrame}, {numpy.array}, or {List[List]} depending on `type`
Postprocessing: expects a {pandas.DataFrame}, {pandas.Styler}, {numpy.array}, {List[List]}, {List}, a {Dict} with keys `data` (and optionally `headers`), or {str} path to a csv, which is rendered in the spreadsheet.
Examples-format: a {str} filepath to a csv with data, a pandas dataframe, or a list of lists (excluding headers) where each sublist is a row of data.
Demos: filter_records, matrix_transpose, tax_calculator
Preprocessing: passes the uploaded spreadsheet data as a {pandas.DataFrame}, {numpy.array}, {polars.DataFrame}, or {List[List]} depending on `type`
Postprocessing: expects a {pandas.DataFrame}, {pandas.Styler}, {numpy.array}, {polars.DataFrame}, {List[List]}, {List}, a {Dict} with keys `data` (and optionally `headers`), or {str} path to a csv, which is rendered in the spreadsheet.
Examples-format: a {str} filepath to a csv with data, a pandas dataframe, a polars dataframe, or a list of lists (excluding headers) where each sublist is a row of data.
Demos: filter_records, matrix_transpose, tax_calculator, sort_records
"""
EVENTS = [Events.change, Events.input, Events.select]
@ -44,6 +70,7 @@ class Dataframe(Component):
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict
@ -55,7 +82,7 @@ class Dataframe(Component):
row_count: int | tuple[int, str] = (1, "dynamic"),
col_count: int | tuple[int, str] | None = None,
datatype: str | list[str] = "str",
type: Literal["pandas", "numpy", "array"] = "pandas",
type: Literal["pandas", "numpy", "array", "polars"] = "pandas",
latex_delimiters: list[dict[str, str | bool]] | None = None,
label: str | None = None,
show_label: bool | None = None,
@ -79,7 +106,7 @@ class Dataframe(Component):
row_count: Limit number of rows for input and decide whether user can create new rows. The first element of the tuple is an `int`, the row count; the second should be 'fixed' or 'dynamic', the new row behaviour. If an `int` is passed the rows default to 'dynamic'
col_count: Limit number of columns for input and decide whether user can create new columns. The first element of the tuple is an `int`, the number of columns; the second should be 'fixed' or 'dynamic', the new column behaviour. If an `int` is passed the columns default to 'dynamic'
datatype: Datatype of values in sheet. Can be provided per column as a list of strings, or for the entire sheet as a single string. Valid datatypes are "str", "number", "bool", "date", and "markdown".
type: Type of value to be returned by component. "pandas" for pandas dataframe, "numpy" for numpy array, or "array" for a Python list of lists.
type: Type of value to be returned by component. "pandas" for pandas dataframe, "numpy" for numpy array, "polars" for polars dataframe, or "array" for a Python list of lists.
label: The label for this component. Appears above the component and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to.
latex_delimiters: A list of dicts of the form {"left": open delimiter (str), "right": close delimiter (str), "display": whether to display in newline (bool)} that will be used to render LaTeX expressions. If not provided, `latex_delimiters` is set to `[{ "left": "$$", "right": "$$", "display": True }]`, so only expressions enclosed in $$ delimiters will be rendered as LaTeX, and in a new line. Pass in an empty list to disable LaTeX rendering. For more information, see the [KaTeX documentation](https://katex.org/docs/autorender.html). Only applies to columns whose datatype is "markdown".
label: The label for this component. Appears above the component and is also used as the header if there are a table of examples for this component. If None and used in a `gr.Interface`, the label will be the name of the parameter this component is assigned to.
@ -113,11 +140,15 @@ class Dataframe(Component):
self.datatype = (
datatype if isinstance(datatype, list) else [datatype] * self.col_count[0]
)
valid_types = ["pandas", "numpy", "array"]
valid_types = ["pandas", "numpy", "array", "polars"]
if type not in valid_types:
raise ValueError(
f"Invalid value for parameter `type`: {type}. Please choose from one of: {valid_types}"
)
if type == "polars" and not _is_polars_available():
raise ImportError(
"Polars is not installed. Please install using `pip install polars`."
)
self.type = type
values = {
"str": "",
@ -160,12 +191,20 @@ class Dataframe(Component):
value=value,
)
def preprocess(self, payload: DataframeData) -> pd.DataFrame | np.ndarray | list:
def preprocess(
self, payload: DataframeData
) -> pd.DataFrame | np.ndarray | pl.DataFrame | list:
if self.type == "pandas":
if payload.headers is not None:
return pd.DataFrame(payload.data, columns=payload.headers)
else:
return pd.DataFrame(payload.data)
if self.type == "polars":
polars = _import_polars()
if payload.headers is not None:
return polars.DataFrame(payload.data, schema=payload.headers)
else:
return polars.DataFrame(payload.data)
if self.type == "numpy":
return np.array(payload.data)
elif self.type == "array":
@ -174,7 +213,7 @@ class Dataframe(Component):
raise ValueError(
"Unknown type: "
+ str(self.type)
+ ". Please choose from: 'pandas', 'numpy', 'array'."
+ ". Please choose from: 'pandas', 'numpy', 'array', 'polars'."
)
def postprocess(
@ -182,6 +221,7 @@ class Dataframe(Component):
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict
@ -224,6 +264,11 @@ class Dataframe(Component):
headers=list(df.columns),
data=df.to_dict(orient="split")["data"], # type: ignore
)
elif _is_polars_available() and isinstance(value, _import_polars().DataFrame):
df_dict = value.to_dict()
headers = list(df_dict.keys())
data = list(zip(*df_dict.values()))
return DataframeData(headers=headers, data=data)
elif isinstance(value, (np.ndarray, list)):
if len(value) == 0:
return self.postprocess([[]])
@ -296,6 +341,7 @@ class Dataframe(Component):
value: pd.DataFrame
| Styler
| np.ndarray
| pl.DataFrame
| list
| list[list]
| dict

View File

@ -8,6 +8,7 @@ fastapi>=0.101.0
gradio_pdf==0.0.3
httpx
huggingface_hub
polars==0.20.5
pydantic
pytest
pytest-asyncio

View File

@ -130,6 +130,8 @@ prompt-toolkit==3.0.30
# via ipython
ptyprocess==0.7.0
# via pexpect
polars==0.20.5
# via -r requirements.in
py==1.11.0
# via pytest
pyparsing==3.0.9