AstraBert
diff --git a/‎.dockerignore‎
Lines changed: 5 additions & 0 deletions b/‎.dockerignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 5 additions & 0 deletions b/‎.gitignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 10 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 7 additions & 0 deletions b/‎README.md‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎agent.py‎
Lines changed: 20 additions & 0 deletions b/‎agent.py‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎api.py‎
Lines changed: 105 additions & 0 deletions b/‎api.py‎
Lines changed: 105 additions & 0 deletions
diff --git a/‎compose.yaml‎
Lines changed: 33 additions & 0 deletions b/‎compose.yaml‎
Lines changed: 33 additions & 0 deletions
diff --git a/‎conda_env.sh‎
Lines changed: 3 additions & 0 deletions b/‎conda_env.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎environment.yml‎
Lines changed: 150 additions & 0 deletions b/‎environment.yml‎
Lines changed: 150 additions & 0 deletions
diff --git a/‎run.sh‎
Lines changed: 6 additions & 0 deletions b/‎run.sh‎
Lines changed: 6 additions & 0 deletions
@@ -0,0 +1,5 @@
+.env
+.mypy_cache/
+__pycache__/
+tests/
+llama-4-researcher.code-workspace
@@ -0,0 +1,5 @@
+.env
+.mypy_cache/
+__pycache__/
+tests/
+llama-4-researcher.code-workspace
@@ -0,0 +1,10 @@
+FROM condaforge/miniforge3
+
+WORKDIR /app/
+COPY *.py /app/
+COPY *.sh /app/
+COPY *.yml /app/
+
+RUN bash conda_env.sh
+
+CMD ["bash", "run.sh"]
@@ -0,0 +1,7 @@
+# Llama4 Researcher
+
+## Turn topics into essays in seconds!
+
+A friendly research companion built on top of Llama 4, powered by [Groq](https://groq.com), [LinkUp](https://linkup.so), [LlamaIndex](https://www.llamaindex.ai), [Gradio](https://gradio.app), [FastAPI](https://fastapi.tiangolo.com) and [Redis](https://redis.io).
+
+### Full documentation coming soon!
@@ -0,0 +1,20 @@
+from tools import deepsearch, evaluate_context, expand_query, guard_prompt
+from llama_index.llms.groq import Groq
+from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow, ToolCall, ToolCallResult
+
+with open("/run/secrets/groq_key", "r") as f:
+    groq_api_key = f.read()
+f.close()
+
+llm = Groq(model="meta-llama/llama-4-scout-17b-16e-instruct", api_key=groq_api_key)
+
+researcher_agent = FunctionAgent(
+    llm=llm,
+    name = "ResearcherAgent",
+    description="An agent that researches the web and creates essays about a given topic based on the information it found",
+    system_prompt=f"You are the ResearcherAgent. Your task is to search the web for information about a given topic specified by the user, evsaluate the informmation you retrieved, and finally produce an essay about the topic, making sure to always referencing sources.\n\nPlease, before answering, make sure to understand the context in which the user is acting.\n\nYour workflow must be the following:\n\n1. Expand the query that the user provides you with employing the 'expand_query' tool with, as argument, the original user's query.\n2. Now that you have expanded the query into sub-queries, run the 'deepsearch' tool for each of these queries, retrieving context from the web\n3. Once you gathered information from the web for a sub-query, run the 'evaluate_context' tool. This will tell you how relevant the context is and the reasons for the evaluation. You can keep all the contexts that are more than 70% relevant.\n4. After you have gathered all the information, produce an essay about the topic you are given, basing on the collected context and making sure to cite the sources.\n\nOnce you are done, close the workflow and return the essay to the user.\n\nIMPORTANT INSTRUCTIONS:\n\n- You MUST ALWAYS evaluate the context retrieved from the web",
+    tools = [expand_query, deepsearch, evaluate_context],
+)
+
+workflow = AgentWorkflow(agents = [researcher_agent], root_agent=researcher_agent.name)
+
@@ -0,0 +1,105 @@
+from agent import workflow, guard_prompt, ToolCall, ToolCallResult
+import redis.asyncio as redis
+from contextlib import asynccontextmanager
+from fastapi import Depends, FastAPI, Header, HTTPException
+from fastapi_limiter import FastAPILimiter
+from fastapi_limiter.depends import RateLimiter
+import json
+from fastapi.responses import ORJSONResponse
+from pydantic import BaseModel
+import gradio as gr
+import requests
+
+class ApiInput(BaseModel):
+    prompt: str
+
+class ApiOutput(BaseModel):
+    is_safe_prompt: bool
+    response: str
+    process: str
+
+with open("/run/secrets/internal_key", "r") as f:
+    internal_key = f.read()
+f.close()
+
+@asynccontextmanager
+async def lifespan(_: FastAPI):
+    redis_connection = redis.from_url("redis://llama_redis:6379", encoding="utf8")
+    await FastAPILimiter.init(redis_connection)
+    yield
+    await FastAPILimiter.close()
+
+async def check_api_key(x_api_key: str = Header(None)):
+    if x_api_key == internal_key:
+        return x_api_key
+    else:
+        raise HTTPException(status_code=401, detail="Invalid API key")
+
+app = FastAPI(default_response_class=ORJSONResponse, lifespan=lifespan)
+
+@app.get("/test", dependencies=[Depends(RateLimiter(times=10, seconds=1))])
+async def index():
+    return {"response": "Hello world!"}
+
+@app.post("/chat", dependencies=[Depends(RateLimiter(times=10, seconds=60))])
+async def chat(inpt: ApiInput, x_api_key: str = Depends(check_api_key)) -> ApiOutput:
+    is_safe, r = await guard_prompt(inpt.prompt)
+    process = ""
+    if not is_safe:
+        return ApiOutput(is_safe_prompt=is_safe, response="I cannot produce an essay about this topic", process=r)
+    handler = workflow.run(user_msg=inpt.prompt)
+    async for event in handler.stream_events():
+        if isinstance(event, ToolCall):
+            process += "Calling tool **" + event.tool_name + "**" + " with arguments:\n```json\n" + json.dumps(event.tool_kwargs, indent=4) + "\n```\n\n"
+        if isinstance(event, ToolCallResult):
+            process += f"Tool call result for **{event.tool_name}**: {event.tool_output}\n\n"
+    response = await handler
+    r = str(response)
+    return ApiOutput(is_safe_prompt=is_safe, response=r, process=process)
+
+
+def add_message(history: list, message: dict):
+    if message is not None:
+        history.append({"role": "user", "content": message})
+    return history, gr.Textbox(value=None, interactive=False)
+
+def bot(history: list):
+    headers = {"Content-Type": "application/json", "x-api-key": internal_key}
+    response = requests.post("http://localhost:80/chat", json=ApiInput(prompt=history[-1]["content"]).model_dump(), headers=headers)
+    if response.status_code == 200:
+        res = response.json()["response"]
+        process = response.json()["process"]
+        history.append({"role": "assistant", "content": f"## Agentic Process\n\n{process}"})
+        return history, "# Canvas\n\n---\n\n"+res
+    elif response.status_code == 429:
+        res = "Sorry, we are having high traffic at the moment... Try again later!"
+        history.append({"role": "assistant", "content": f"Sorry, we are having high traffic at the moment... Try again later!"})
+        return history, "# Canvas\n\n---\n\n"+res
+    else:
+        res = "Sorry, an internal error occurred. Feel free to report the bug on [GitHub discussions](https://github.com/AstraBert/llama-4-researcher/discussions/)"
+        history.append({"role": "assistant", "content": f"Sorry, an internal error occurred. Feel free to report the bug on [GitHub discussions](https://github.com/AstraBert/llama-4-researcher/discussions/)"})
+        return history, "# Canvas\n\n---\n\n"+res
+
+with gr.Blocks(theme=gr.themes.Citrus(), title="LlamaResearcher") as frontend:
+    title = gr.HTML("<h1 align='center'>LlamaResearcher</h1>\n<h2 align='center'>From topic to essay in seconds!</h2>")
+    with gr.Row():
+        with gr.Column():
+            canvas = gr.Markdown(label="Canvas", show_label=True, show_copy_button=True, container=True, min_height=700)
+        with gr.Column():
+            chatbot = gr.Chatbot(elem_id="chatbot", type="messages", min_height=700, min_width=700, label="LlamaResearcher Chat")
+            with gr.Row():
+                chat_input = gr.Textbox(
+                    interactive=True,
+                    placeholder="Enter message...",
+                    show_label=False,
+                    submit_btn=True,
+                    stop_btn=True,
+                )
+
+                chat_msg = chat_input.submit(
+                    add_message, [chatbot, chat_input], [chatbot, chat_input]
+                )
+                bot_msg = chat_msg.then(bot, chatbot, [chatbot, canvas], api_name="bot_response")
+                bot_msg.then(lambda: gr.Textbox(interactive=True), None, [chat_input])
+
+app = gr.mount_gradio_app(app, frontend, "")
@@ -0,0 +1,33 @@
+name: llama-4-researcher
+
+services:
+  llama_redis:
+    image: redis
+    ports: 
+      - 6379:6379
+    networks:
+      - nginxproxymanager_default
+  llama_app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    secrets:
+      - groq_key
+      - internal_key
+      - linkup_key
+    networks:
+      - nginxproxymanager_default 
+    ports:
+      - 7998:80
+
+networks:
+  nginxproxymanager_default:
+    external: true
+
+secrets:
+  groq_key:
+    environment: groq_api_key
+  linkup_key:
+    environment: linkup_api_key
+  internal_key:
+    environment: internal_api_key
@@ -0,0 +1,3 @@
+eval "$(conda shell.bash hook)"
+
+conda env create -f /app/environment.yml
@@ -0,0 +1,150 @@
+name: llama-4-researcher
+channels:
+  - conda-forge
+dependencies:
+  - _libgcc_mutex=0.1=conda_forge
+  - _openmp_mutex=4.5=2_gnu
+  - bzip2=1.0.8=h4bc722e_7
+  - ca-certificates=2025.1.31=hbcca054_0
+  - ld_impl_linux-64=2.43=h712a8e2_4
+  - libexpat=2.7.0=h5888daf_0
+  - libffi=3.4.6=h2dba641_1
+  - libgcc=14.2.0=h767d61c_2
+  - libgcc-ng=14.2.0=h69a702a_2
+  - libgomp=14.2.0=h767d61c_2
+  - liblzma=5.8.1=hb9d3cd8_0
+  - libnsl=2.0.1=hd590300_0
+  - libsqlite=3.49.1=hee588c1_2
+  - libuuid=2.38.1=h0b41bf4_0
+  - libxcrypt=4.4.36=hd590300_1
+  - libzlib=1.3.1=hb9d3cd8_2
+  - ncurses=6.5=h2d0b736_3
+  - openssl=3.4.1=h7b32b05_0
+  - pip=25.0.1=pyh8b19718_0
+  - python=3.11.11=h9e4cc4f_2_cpython
+  - readline=8.2=h8c095d6_2
+  - setuptools=78.1.0=pyhff2d567_0
+  - tk=8.6.13=noxft_h4845f30_101
+  - wheel=0.45.1=pyhd8ed1ab_1
+  - pip:
+      - aiofiles==23.2.1
+      - aiohappyeyeballs==2.6.1
+      - aiohttp==3.11.16
+      - aiosignal==1.3.2
+      - annotated-types==0.7.0
+      - anyio==4.9.0
+      - attrs==25.3.0
+      - banks==2.1.1
+      - beautifulsoup4==4.13.3
+      - certifi==2025.1.31
+      - charset-normalizer==3.4.1
+      - click==8.1.8
+      - colorama==0.4.6
+      - dataclasses-json==0.6.7
+      - deprecated==1.2.18
+      - dirtyjson==1.0.8
+      - distro==1.9.0
+      - dnspython==2.7.0
+      - email-validator==2.2.0
+      - fastapi==0.115.12
+      - fastapi-cli==0.0.7
+      - fastapi-limiter==0.1.6
+      - ffmpy==0.5.0
+      - filelock==3.18.0
+      - filetype==1.2.0
+      - frozenlist==1.5.0
+      - fsspec==2025.3.2
+      - gradio==5.23.3
+      - gradio-client==1.8.0
+      - greenlet==3.1.1
+      - griffe==1.7.2
+      - groovy==0.1.2
+      - h11==0.14.0
+      - httpcore==1.0.7
+      - httptools==0.6.4
+      - httpx==0.28.1
+      - huggingface-hub==0.30.1
+      - idna==3.10
+      - jinja2==3.1.6
+      - jiter==0.9.0
+      - joblib==1.4.2
+      - linkup-sdk==0.2.4
+      - llama-cloud==0.1.17
+      - llama-cloud-services==0.6.9
+      - llama-index==0.12.28
+      - llama-index-agent-openai==0.4.6
+      - llama-index-cli==0.4.1
+      - llama-index-core==0.12.28
+      - llama-index-embeddings-openai==0.3.1
+      - llama-index-indices-managed-llama-cloud==0.6.11
+      - llama-index-llms-groq==0.3.1
+      - llama-index-llms-openai==0.3.30
+      - llama-index-llms-openai-like==0.3.4
+      - llama-index-multi-modal-llms-openai==0.4.3
+      - llama-index-program-openai==0.3.1
+      - llama-index-question-gen-openai==0.3.0
+      - llama-index-readers-file==0.4.7
+      - llama-index-readers-llama-parse==0.4.0
+      - llama-parse==0.6.4.post1
+      - markdown-it-py==3.0.0
+      - markupsafe==3.0.2
+      - marshmallow==3.26.1
+      - mdurl==0.1.2
+      - multidict==6.3.2
+      - mypy-extensions==1.0.0
+      - nest-asyncio==1.6.0
+      - networkx==3.4.2
+      - nltk==3.9.1
+      - numpy==2.2.4
+      - openai==1.70.0
+      - orjson==3.10.16
+      - packaging==24.2
+      - pandas==2.2.3
+      - pillow==11.1.0
+      - platformdirs==4.3.7
+      - propcache==0.3.1
+      - pydantic==2.11.2
+      - pydantic-core==2.33.1
+      - pydub==0.25.1
+      - pygments==2.19.1
+      - pypdf==5.4.0
+      - python-dateutil==2.9.0.post0
+      - python-dotenv==1.1.0
+      - python-multipart==0.0.20
+      - pytz==2025.2
+      - pyyaml==6.0.2
+      - redis==5.2.1
+      - regex==2024.11.6
+      - requests==2.32.3
+      - rich==14.0.0
+      - rich-toolkit==0.14.1
+      - ruff==0.11.4
+      - safehttpx==0.1.6
+      - safetensors==0.5.3
+      - semantic-version==2.10.0
+      - shellingham==1.5.4
+      - six==1.17.0
+      - sniffio==1.3.1
+      - soupsieve==2.6
+      - sqlalchemy==2.0.40
+      - starlette==0.46.1
+      - striprtf==0.0.26
+      - tenacity==9.1.2
+      - tiktoken==0.9.0
+      - tokenizers==0.21.1
+      - tomlkit==0.13.2
+      - tqdm==4.67.1
+      - transformers==4.51.0
+      - typer==0.15.2
+      - typing-extensions==4.13.1
+      - typing-inspect==0.9.0
+      - typing-inspection==0.4.0
+      - tzdata==2025.2
+      - urllib3==2.3.0
+      - uv==0.6.12
+      - uvicorn==0.34.0
+      - uvloop==0.21.0
+      - watchfiles==1.0.4
+      - websockets==15.0.1
+      - wrapt==1.17.2
+      - yarl==1.19.0
@@ -0,0 +1,6 @@
+eval "$(conda shell.bash hook)"
+
+conda activate llama-4-researcher
+cd /app/
+uvicorn api:app --host 0.0.0.0 --port 80
+conda deactivate
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+eval "$(conda shell.bash hook)"`
	`2`	`+`
	`3`	`+conda env create -f /app/environment.yml`