Загрузка данных


import time
import requests
import modal
import json
from fastapi import FastAPI, Request, Response
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import StreamingResponse

image = modal.Image.debian_slim().pip_install("requests", "fastapi[standard]")
app = modal.App("glm5-proxy", image=image)

GLM_URL = "https://integrate.api.nvidia.com/v1/chat/completions"
GLM_API_KEY = "КЛЮЧ"

FORMATTING_SYSTEM_PROMPT = """Format your responses with proper paragraphs. 
Use line breaks between paragraphs. 
Never write a response as a single block of text. 
Separate different thoughts, actions, and dialogue into distinct paragraphs."""

web_app = FastAPI()
web_app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_methods=["*"],
    allow_headers=["*"],
)


def inject_system_prompt(messages: list) -> list:
    if not messages:
        return messages
    if messages[0].get("role") == "system":
        messages[0]["content"] = messages[0]["content"] + "\n\n" + FORMATTING_SYSTEM_PROMPT
    else:
        messages.insert(0, {"role": "system", "content": FORMATTING_SYSTEM_PROMPT})
    return messages


def fix_chunk_formatting(chunk_bytes: bytes) -> bytes:
    try:
        text = chunk_bytes.decode("utf-8")
        lines = text.split("\n")
        fixed_lines = []
        for line in lines:
            if line.startswith("data: ") and line != "data: [DONE]":
                json_str = line[6:]
                try:
                    data = json.loads(json_str)
                    for choice in data.get("choices", []):
                        delta = choice.get("delta", {})
                        content = delta.get("content", "")
                        if content:
                            delta["content"] = content.replace("\\n", "\n")
                    fixed_lines.append("data: " + json.dumps(data, ensure_ascii=False))
                except json.JSONDecodeError:
                    fixed_lines.append(line)
            else:
                fixed_lines.append(line)
        return "\n".join(fixed_lines).encode("utf-8")
    except Exception:
        return chunk_bytes


@web_app.post("/")
async def proxy(request: Request):
    print(f"\n[>>>] {time.strftime('%H:%M:%S')} - НОВЫЙ ЗАПРОС")

    try:
        body = await request.json()

        body['stream'] = True
        body['model'] = "z-ai/glm-5.1"
        body['temperature'] = 1.0
        body['max_tokens'] = 16384
        body.setdefault("chat_template_kwargs", {})["enable_thinking"] = True

        if 'messages' in body:
            body['messages'] = inject_system_prompt(body['messages'])

        headers = {
            "Authorization": f"Bearer {GLM_API_KEY}",
            "Content-Type": "application/json",
            "Connection": "keep-alive"
        }

        try:
            r = requests.post(GLM_URL, headers=headers, json=body, stream=True, timeout=600)
            print(f"[<<<] ОТВЕТ ОТ GLM: {r.status_code}")

            if r.status_code != 200:
                return Response(content=r.text, status_code=r.status_code)

            def generate():
                try:
                    for chunk in r.iter_content(chunk_size=1024):
                        if chunk:
                            yield fix_chunk_formatting(chunk)
                except Exception as e:
                    print(f"[!] Стриминг прерван: {e}")

            return StreamingResponse(
                generate(),
                media_type="text/event-stream",
                headers={
                    "Cache-Control": "no-cache",
                    "Connection": "keep-alive",
                    "X-Accel-Buffering": "no"
                }
            )

        except requests.exceptions.RequestException as e:
            print(f"!!! ОШИБКА СЕТИ: {e}")
            return Response(content=f"Network error: {e}", status_code=502)

    except Exception as e:
        print(f"!!! КРИТИЧЕСКАЯ ОШИБКА: {str(e)}")
        return Response(content=f"Proxy error: {str(e)}", status_code=500)


@app.function(timeout=600, min_containers=1)
@modal.asgi_app()
def fastapi_app():
    return web_app