Control which files get moved to cache with gr.set_static_paths (#7618)

* First commit

* Add code

* undo demo changes

* add changeset

* Add documentation

* Rename to set_static_paths

* add changeset

* Fix docstring issue

* add changeset

* Address feedback

---------

Co-authored-by: gradio-pr-bot <gradio-pr-bot@users.noreply.github.com>
This commit is contained in:
Freddy Boulton 2024-03-07 12:39:07 -08:00 committed by GitHub
parent a22f3e062d
commit 0ae1e4486c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 152 additions and 5 deletions

View File

@ -0,0 +1,6 @@
---
"gradio": patch
"gradio_client": patch
---
fix:Control which files get moved to cache with gr.set_static_paths

View File

@ -57,6 +57,7 @@ _module_prefixes = [
("gradio.route", "routes"),
("gradio.theme", "themes"),
("gradio_client.", "py-client"),
("gradio.utils", "helpers"),
]

View File

@ -97,6 +97,6 @@ from gradio.templates import (
TextArea,
)
from gradio.themes import Base as Theme
from gradio.utils import get_package_version
from gradio.utils import get_package_version, set_static_paths
__version__ = get_package_version()

View File

@ -626,7 +626,6 @@ class Blocks(BlockContext, BlocksEvents, metaclass=BlocksMeta):
self.progress_tracking = None
self.ssl_verify = True
self.allowed_paths = []
self.blocked_paths = []
self.root_path = os.environ.get("GRADIO_ROOT_PATH", "")

View File

@ -210,3 +210,19 @@ class ListFiles(GradioRootModel):
def __iter__(self):
return iter(self.root)
class _StaticFiles:
"""
Class to hold all static files for an app
"""
all_paths = []
def __init__(self, paths: list[str | pathlib.Path]) -> None:
self.paths = paths
self.all_paths = [pathlib.Path(p).resolve() for p in paths]
@classmethod
def clear(cls):
cls.all_paths = []

View File

@ -18,7 +18,7 @@ import numpy as np
from gradio_client import utils as client_utils
from PIL import Image, ImageOps, PngImagePlugin
from gradio import wasm_utils
from gradio import utils, wasm_utils
from gradio.data_classes import FileData, GradioModel, GradioRootModel
from gradio.utils import abspath, get_upload_folder, is_in_or_equal
@ -262,6 +262,8 @@ def move_files_to_cache(
# This makes it so that the URL is not downloaded and speeds up event processing
if payload.url and postprocess and client_utils.is_http_url_like(payload.url):
payload.path = payload.url
elif utils.is_static_file(payload):
pass
elif not block.proxy_url:
# If the file is on a remote server, do not move it to cache.
if check_in_upload_folder and not client_utils.is_http_url_like(

View File

@ -495,13 +495,18 @@ class App(FastAPI):
utils.is_in_or_equal(abs_path, allowed_path)
for allowed_path in blocks.allowed_paths
)
is_static_file = utils.is_static_file(abs_path)
was_uploaded = utils.is_in_or_equal(abs_path, app.uploaded_file_dir)
is_cached_example = utils.is_in_or_equal(
abs_path, utils.abspath(utils.get_cache_folder())
)
if not (
created_by_app or in_allowlist or was_uploaded or is_cached_example
created_by_app
or in_allowlist
or was_uploaded
or is_cached_example
or is_static_file
):
raise HTTPException(403, f"File not allowed: {path_or_url}.")

View File

@ -40,10 +40,12 @@ from typing import (
import anyio
import httpx
from gradio_client.documentation import document
from typing_extensions import ParamSpec
import gradio
from gradio.context import Context
from gradio.data_classes import FileData
from gradio.strings import en
if TYPE_CHECKING: # Only import for type checking (is False at runtime).
@ -958,6 +960,64 @@ def is_in_or_equal(path_1: str | Path, path_2: str | Path):
return True
@document()
def set_static_paths(paths: list[str | Path]) -> None:
"""
Set the static paths to be served by the gradio app.
Static files are not moved to the gradio cache and are served directly from the file system.
This function is useful when you want to serve files that you know will not be modified during the lifetime of the gradio app (like files used in gr.Examples).
By setting static paths, your app will launch faster and it will consume less disk space.
Calling this function will set the static paths for all gradio applications defined in the same interpreter session until it is called again or the session ends.
To clear out the static paths, call this function with an empty list.
Parameters:
paths: List of filepaths or directory names to be served by the gradio app. If it is a directory name, ALL files located within that directory will be considered static and not moved to the gradio cache. This also means that ALL files in that directory will be accessible over the network.
Example:
import gradio as gr
# Paths can be a list of strings or pathlib.Path objects
# corresponding to filenames or directories.
gr.set_static_paths(paths=["test/test_files/"])
# The example files and the default value of the input
# will not be copied to the gradio cache and will be served directly.
demo = gr.Interface(
lambda s: s.rotate(45),
gr.Image(value="test/test_files/cheetah1.jpg", type="pil"),
gr.Image(),
examples=["test/test_files/bus.png"],
)
demo.launch()
"""
from gradio.data_classes import _StaticFiles
_StaticFiles.all_paths.extend([Path(p).resolve() for p in paths])
def is_static_file(file_path: Any):
"""Returns True if the file is a static file (and not moved to cache)"""
from gradio.data_classes import _StaticFiles
return _is_static_file(file_path, _StaticFiles.all_paths)
def _is_static_file(file_path: Any, static_files: list[Path]) -> bool:
"""
Returns True if the file is a static file (i.e. is is in the static files list).
"""
if not isinstance(file_path, (str, Path, FileData)):
return False
if isinstance(file_path, FileData):
file_path = file_path.path
if isinstance(file_path, str):
file_path = Path(file_path)
if not file_path.exists():
return False
return any(is_in_or_equal(file_path, static_file) for static_file in static_files)
HTML_TAG_RE = re.compile("<.*?>")

View File

@ -426,7 +426,7 @@ There are actually two separate Gradio apps in this example! One that simply dis
Sharing your Gradio app with others (by hosting it on Spaces, on your own server, or through temporary share links) **exposes** certain files on the host machine to users of your Gradio app.
In particular, Gradio apps ALLOW users to access to three kinds of files:
In particular, Gradio apps ALLOW users to access to four kinds of files:
- **Temporary files created by Gradio.** These are files that are created by Gradio as part of running your prediction function. For example, if your prediction function returns a video file, then Gradio will save that video to a temporary cache on your device and then send the path to the file to the front end. You can customize the location of temporary cache files created by Gradio by setting the environment variable `GRADIO_TEMP_DIR` to an absolute path, such as `/home/usr/scripts/project/temp/`. You can delete the files created by your app when it shuts down with the `delete_cache` parameter of `gradio.Blocks`, `gradio.Interface`, and `gradio.ChatInterface`. This parameter is a tuple of integers of the form `[frequency, age]` where `frequency` is how often to delete files and `age` is the time in seconds since the file was last modified.
@ -435,6 +435,8 @@ In particular, Gradio apps ALLOW users to access to three kinds of files:
- **Files that you explicitly allow via the `allowed_paths` parameter in `launch()`**. This parameter allows you to pass in a list of additional directories or exact filepaths you'd like to allow users to have access to. (By default, this parameter is an empty list).
- **Static files that you explicitly set via the `gr.set_static_paths` function. This parameter allows you to pass in a list of directories or filenames that will be considered static. This means that they will not be copied to the cache and will be served directly from your computer. This can help save disk space and reduce the time your app takes to launch but be mindful of possible security implications.
Gradio DOES NOT ALLOW access to:
- **Files that you explicitly block via the `blocked_paths` parameter in `launch()`**. You can pass in a list of additional directories or exact filepaths to the `blocked_paths` parameter in `launch()`. This parameter takes precedence over the files that Gradio exposes by default or by the `allowed_paths`.

View File

@ -70,3 +70,15 @@ def gradio_temp_dir(monkeypatch, tmp_path):
"""
monkeypatch.setenv("GRADIO_TEMP_DIR", str(tmp_path))
return tmp_path
@pytest.fixture(autouse=True)
def clear_static_files():
"""Clears all static files from the _StaticFiles class.
This is necessary because the tests should be independent of one another.
"""
yield
from gradio import data_classes
data_classes._StaticFiles.clear()

View File

@ -1707,3 +1707,47 @@ def test_blocks_postprocessing_with_copies_of_component_instance():
demo.postprocess_data(0, [gr.Chatbot(value=[])] * 3, None)
== [{"value": [], "__type__": "update"}] * 3
)
def test_static_files_single_app(connect, gradio_temp_dir):
gr.set_static_paths(
paths=["test/test_files/cheetah1.jpg", "test/test_files/bus.png"]
)
demo = gr.Interface(
lambda s: s.rotate(45),
gr.Image(value="test/test_files/cheetah1.jpg", type="pil"),
gr.Image(),
examples=["test/test_files/bus.png"],
)
# Nothing got saved to cache
assert len(list(gradio_temp_dir.glob("**/*.*"))) == 0
with connect(demo) as client:
client.predict("test/test_files/bus.png")
# Input/Output got saved to cache
assert len(list(gradio_temp_dir.glob("**/*.*"))) == 2
def test_static_files_multiple_apps(gradio_temp_dir):
gr.set_static_paths(paths=["test/test_files/cheetah1.jpg"])
demo = gr.Interface(
lambda s: s.rotate(45),
gr.Image(value="test/test_files/cheetah1.jpg"),
gr.Image(),
)
gr.set_static_paths(paths=["test/test_files/images"])
demo_2 = gr.Interface(
lambda s: s.rotate(45),
gr.Image(value="test/test_files/images/bus.png"),
gr.Image(),
)
with gr.Blocks():
demo.render()
demo_2.render()
# Input/Output got saved to cache
assert len(list(gradio_temp_dir.glob("**/*.*"))) == 0