Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 18 additions & 5 deletions observal-server/api/routes/mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,12 +90,25 @@ async def submit_mcp(
db: AsyncSession = Depends(get_db),
current_user: User = Depends(require_role(UserRole.user)),
):
# Prevent duplicate names for the same user
existing = await db.execute(
select(McpListing).where(McpListing.name == req.name, McpListing.submitted_by == current_user.id)
# Prevent duplicate names for the same user.
# Pending/rejected listings are replaced automatically so the user isn't
# blocked when re-submitting after a mistake. Approved listings are
# protected — use the update flow instead.
existing = (
(
await db.execute(
select(McpListing).where(McpListing.name == req.name, McpListing.submitted_by == current_user.id)
)
)
.scalars()
.first()
)
if existing.scalars().first():
raise HTTPException(status_code=409, detail=f"You already have a listing named '{req.name}'")
if existing:
if existing.status == ListingStatus.approved:
raise HTTPException(status_code=409, detail=f"You already have an approved listing named '{req.name}'")
# Replace the old pending/rejected listing
await db.delete(existing)
await db.flush()

listing = McpListing(
name=req.name,
Expand Down
4 changes: 2 additions & 2 deletions observal-server/services/agent_config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,9 @@ def _build_mcp_configs(
# agent file gets proper mcpServers frontmatter.
safe = _sanitize_name(listing.name)
mcp_id = str(listing.id)
run_cmd = _build_run_command(safe, listing.framework)
run_cmd = _build_run_command(safe, listing.framework, listing.docker_image, mcp_env)
shim_args = ["--mcp-id", mcp_id, "--", *run_cmd]
mcp_configs[safe] = {"command": "observal-shim", "args": shim_args, "env": {}}
mcp_configs[safe] = {"command": "observal-shim", "args": shim_args, "env": mcp_env}

for ext in agent.external_mcps or []:
name = _sanitize_name(ext.get("name", ""))
Expand Down
2 changes: 1 addition & 1 deletion observal-server/services/config_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def _build_run_command(
- Python / unknown: python -m <name>
"""
fw = (framework or "").lower()
if "docker" in fw and docker_image:
if docker_image:
cmd = ["docker", "run", "-i", "--rm"]
for k, v in (server_env or {}).items():
cmd.extend(["-e", f"{k}={v}"])
Expand Down
160 changes: 145 additions & 15 deletions observal-server/services/mcp_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,25 @@ async def _async_clone(clone_url: str, dest: str, depth: int = 1) -> None:
)


_ENV_VAR_PATTERN = re.compile(
_ENV_VAR_PATTERN_PYTHON = re.compile(
r"""os\.environ\s*(?:\.get\s*\(\s*|\.?\[?\s*\[?\s*)["']([A-Z][A-Z0-9_]+)["']"""
r"""|os\.getenv\s*\(\s*["']([A-Z][A-Z0-9_]+)["']"""
)

_ENV_VAR_PATTERN_GO = re.compile(r"""os\.Getenv\(\s*"([A-Z][A-Z0-9_]+)"\s*\)""")

_ENV_VAR_PATTERN_TS = re.compile(
r"""process\.env\.([A-Z][A-Z0-9_]+)"""
r"""|process\.env\[\s*["']([A-Z][A-Z0-9_]+)["']\s*\]"""
)

# README patterns: docker -e flags, export statements, JSON config keys
_README_PATTERNS = [
re.compile(r"""-e\s+([A-Z][A-Z0-9_]+)"""),
re.compile(r"""export\s+([A-Z][A-Z0-9_]+)="""),
re.compile(r""""([A-Z][A-Z0-9_]+)"\s*:\s*\""""),
]

# Env vars that are internal to the runtime / framework, not user-facing config
_INTERNAL_ENV_VARS = frozenset(
{
Expand All @@ -109,6 +123,7 @@ async def _async_clone(clone_url: str, dest: str, depth: int = 1) -> None:
"PORT",
"HOST",
"DEBUG",
"APP",
"LOG_LEVEL",
"LOGGING_LEVEL",
"HOSTNAME",
Expand All @@ -121,6 +136,15 @@ async def _async_clone(clone_url: str, dest: str, depth: int = 1) -> None:
}
)

# User-facing env vars that match a filtered prefix but should still be detected
_ALLOWED_ENV_VARS = frozenset(
{
"GITHUB_TOKEN",
"GITHUB_PERSONAL_ACCESS_TOKEN",
"DOCKER_HOST",
}
)

# Prefix patterns for build/CI/infrastructure env vars that are never user-facing
_FILTERED_PREFIXES = (
"CI_",
Expand All @@ -143,33 +167,107 @@ async def _async_clone(clone_url: str, dest: str, depth: int = 1) -> None:

def _is_filtered_env_var(name: str) -> bool:
"""Return True if the env var is internal/infrastructure and should not be prompted."""
if name in _ALLOWED_ENV_VARS:
return False
if name in _INTERNAL_ENV_VARS:
return True
return any(name.startswith(prefix) for prefix in _FILTERED_PREFIXES)


def _detect_env_vars(tmp_dir: str) -> list[dict]:
"""Scan repo files for required environment variables.
# Directories that contain test / internal / build code — not user-facing config
_SKIP_DIRS = frozenset(
{
"test",
"tests",
"e2e",
"internal",
"testdata",
"vendor",
"node_modules",
"__pycache__",
".git",
}
)

Scans Python source (os.environ/os.getenv) and .env.example files.
Dockerfile ENV/ARG directives are intentionally skipped — they contain
build-time variables that are not user-facing configuration.
"""
root = Path(tmp_dir)
found: dict[str, str] = {} # name -> description hint

# Scan Python files for os.environ / os.getenv
for py_file in root.rglob("*.py"):
def _is_test_file(path: Path) -> bool:
"""Return True if the file is in a test/internal directory or is a test file."""
if any(part in _SKIP_DIRS for part in path.parts):
return True
name = path.name
return name.endswith("_test.go") or name.startswith("test_") or name.endswith("_test.py")


def _scan_files_for_env_vars(root: Path, glob: str, pattern: re.Pattern, found: dict[str, str]) -> None:
"""Scan files matching *glob* for env var references using *pattern*."""
for path in root.rglob(glob):
if _is_test_file(path.relative_to(root)):
continue
try:
content = py_file.read_text(errors="ignore")
for m in _ENV_VAR_PATTERN.finditer(content):
name = m.group(1) or m.group(2)
content = path.read_text(errors="ignore")
for m in pattern.finditer(content):
name = next((g for g in m.groups() if g), None)
if name and not _is_filtered_env_var(name):
found.setdefault(name, "")
except Exception:
continue

# Scan .env.example / .env.sample for documented env vars

def _scan_readme_for_env_vars(root: Path, found: dict[str, str]) -> None:
"""Extract env vars from README files (docker -e, export, JSON config)."""
for name in ("README.md", "README.rst", "README.txt", "README"):
readme = root / name
if not readme.exists():
continue
try:
content = readme.read_text(errors="ignore")
except Exception:
continue
for pattern in _README_PATTERNS:
for m in pattern.finditer(content):
var = m.group(1)
if var and not _is_filtered_env_var(var):
found.setdefault(var, "")
break # only scan the first README found


def _extract_manifest_env_vars(root: Path, found: dict[str, str]) -> bool:
"""Extract env vars from a server.json MCP manifest (authoritative source).

The manifest is the standard MCP server descriptor. Env vars declared here
are always included — they bypass the prefix filter since the author
explicitly listed them as required.

Returns True if a valid server.json was found (even if it declares no env vars).
"""
manifest = root / "server.json"
if not manifest.exists():
return False
try:
data = json.loads(manifest.read_text(errors="ignore"))
except Exception:
return False
# packages[].runtimeArguments — Docker -e flags (e.g. GitHub MCP server)
for pkg in data.get("packages", []):
for arg in pkg.get("runtimeArguments", []):
value = arg.get("value", "")
# Pattern: "ENV_VAR={placeholder}" — extract the var name before '='
if "=" in value:
var_name = value.split("=", 1)[0]
if var_name and var_name == var_name.upper():
desc = arg.get("description", "")
found.setdefault(var_name, desc)

# remotes[].variables — URL-interpolated secrets (e.g. ?api_key={key})
for remote in data.get("remotes", []):
for var_key, var_meta in (remote.get("variables") or {}).items():
desc = var_meta.get("description", "") if isinstance(var_meta, dict) else ""
found.setdefault(var_key, desc)
return True


def _scan_env_example(root: Path, found: dict[str, str]) -> None:
"""Scan .env.example / .env.sample files for documented env vars."""
for env_file in root.glob(".env*"):
if env_file.name in (".env", ".env.local"):
continue # skip actual secrets
Expand All @@ -184,6 +282,38 @@ def _detect_env_vars(tmp_dir: str) -> list[dict]:
except Exception:
continue


def _detect_env_vars(tmp_dir: str) -> list[dict]:
"""Scan repo files for required environment variables.

Tiered detection (stops at first tier that finds results):
1. server.json manifest (authoritative — author's explicit declaration)
2. README + .env.example (author's documentation)
3. Source code scanning (last resort — catches os.Getenv / process.env / etc.)
"""
root = Path(tmp_dir)
found: dict[str, str] = {}

# Tier 1: MCP server manifest — authoritative, skip everything else
if _extract_manifest_env_vars(root, found):
return [{"name": k, "description": v, "required": True} for k, v in sorted(found.items())]

# Tier 2: README — author's documented config (export, docker -e, JSON examples)
_scan_readme_for_env_vars(root, found)
if found:
return [{"name": k, "description": v, "required": True} for k, v in sorted(found.items())]

# Tier 3: .env.example — explicit config template
_scan_env_example(root, found)
if found:
return [{"name": k, "description": v, "required": True} for k, v in sorted(found.items())]

# Tier 4: Source code scanning — last resort
_scan_files_for_env_vars(root, "*.py", _ENV_VAR_PATTERN_PYTHON, found)
_scan_files_for_env_vars(root, "*.go", _ENV_VAR_PATTERN_GO, found)
for ext in ("*.ts", "*.js", "*.mts", "*.mjs"):
_scan_files_for_env_vars(root, ext, _ENV_VAR_PATTERN_TS, found)

return [{"name": k, "description": v, "required": True} for k, v in sorted(found.items())]


Expand Down
Loading
Loading