From 740364e5cee5f96625fe0da3ac8257d97e5f0815 Mon Sep 17 00:00:00 2001 From: Freddy Boulton Date: Thu, 11 Jul 2024 10:58:05 +0200 Subject: [PATCH] Add guide on best practices for ZeroGPU limits with the python client (#8656) * Add guide * typos * add changeset * add changeset * styling and fix bad id --------- Co-authored-by: gradio-pr-bot Co-authored-by: aliabd --- .changeset/purple-seals-fetch.md | 5 ++ .../gradio_client/02_version-1-release.svx | 2 +- .../03_using-zero-gpu-spaces.svx | 63 +++++++++++++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) create mode 100644 .changeset/purple-seals-fetch.md create mode 100644 js/_website/src/lib/templates/python-client/gradio_client/03_using-zero-gpu-spaces.svx diff --git a/.changeset/purple-seals-fetch.md b/.changeset/purple-seals-fetch.md new file mode 100644 index 0000000000..41e02ecc8a --- /dev/null +++ b/.changeset/purple-seals-fetch.md @@ -0,0 +1,5 @@ +--- +"website": patch +--- + +feat:Add guide on best practices for ZeroGPU limits with the python client diff --git a/js/_website/src/lib/templates/python-client/gradio_client/02_version-1-release.svx b/js/_website/src/lib/templates/python-client/gradio_client/02_version-1-release.svx index 21322cfa4d..198c52b0f2 100644 --- a/js/_website/src/lib/templates/python-client/gradio_client/02_version-1-release.svx +++ b/js/_website/src/lib/templates/python-client/gradio_client/02_version-1-release.svx @@ -133,7 +133,7 @@ if __name__ == "__main__": app.run(host="0.0.0.0", port=5000) ``` -### 1.0 Migration Guide and Breaking Changes +### v1.0 Migration Guide and Breaking Changes
diff --git a/js/_website/src/lib/templates/python-client/gradio_client/03_using-zero-gpu-spaces.svx b/js/_website/src/lib/templates/python-client/gradio_client/03_using-zero-gpu-spaces.svx new file mode 100644 index 0000000000..44f709e89b --- /dev/null +++ b/js/_website/src/lib/templates/python-client/gradio_client/03_using-zero-gpu-spaces.svx @@ -0,0 +1,63 @@ + + +# Using ZeroGPU Spaces with the Clients + +Hugging Face Spaces now offers a new hardware option called ZeroGPU. +ZeroGPU is a "serverless" cluster of spaces that let Gradio applications run on A100 GPUs for free. +These kinds of spaces are a great foundation to build new applications on top of with the python gradio client, but you need to take care to avoid ZeroGPU's rate limiting. + +### Explaining Rate Limits for ZeroGPU + +ZeroGPU spaces are rate-limited to ensure that a single user does not hog all of the available GPUs. +The limit is controlled by a special token that the Hugging Face Hub infrastructure adds to all incoming requests to Spaces. +This token is a request header called `X-IP-Token` and its value changes depending on the user who makes a request to the ZeroGPU space. +
+ +Let's say you want to create a space (Space A) that uses a ZeroGPU space (Space B) programmatically. +Simply calling Space B from Space A with the python client will quickly exhaust your rate limit, as all the requests to the ZeroGPU space will have the same token. +So in order to avoid this, we need to extract the token of the user using Space A before we call Space B programmatically. +
+ +How to do this will be explained in the following section. + +### Avoiding Rate Limits + +When a user visits the page, we will extract their token from the `X-IP-Token` header of the incoming request. +We will use this header value in all subsequent client requests. +The following hypothetical text-to-image application shows how this is done. +
+ +We use the `load` event to extract the user's `x-ip-token` header when they visit the page. +We create a new client with this header passed to the `headers` parameter. +This ensures all subsequent predictions pass this header to the ZeroGPU space. +The client is saved in a State variable so that it's kept independent from other users. +It will be deleted automatically when the user exits the page. + + +```python +import gradio as gr +from gradio_client import Client + +def text_to_image(client, prompt): + img = client.predict(prompt, api_name="/predict") + return img + + +def set_client_for_session(request: gr.Request): + x_ip_token = request.headers['x-ip-token'] + + # The "gradio/text-to-image" space is a ZeroGPU space + return Client("gradio/text-to-image", headers={"X-IP-Token": x_ip_token}) + +with gr.Blocks() as demo: + client = gr.State() + image = gr.Image() + prompt = gr.Textbox(max_lines=1) + + prompt.submit(text_to_image, [client, prompt], [image]) + + demo.load(set_client_for_session, None, client) +``` +