Files
Huy Doan 238d639de7 fix(providers): skip URL validation for ACP provider api_base (#610)
* fix(providers): skip URL validation for ACP provider api_base

ACP uses api_base to store agent command/binary, so enforcing http/https URL validation rejects valid ACP configs. Bypass provider URL checks for ACP while keeping URL validation for HTTP-based providers.

* fix(message): enhance target handling in message tool to support numeric chat IDs

- Introduced argString function to correctly parse numeric chat IDs from tool arguments, ensuring they are treated as strings when necessary.
- Added a new test case to verify that numeric targets are correctly processed and sent via the Telegram channel.
2026-04-01 11:55:48 +07:00

423 lines
14 KiB
Go

package tools
import (
"context"
"encoding/json"
"fmt"
"log/slog"
"os"
"path/filepath"
"regexp"
"strconv"
"strings"
"github.com/google/uuid"
"github.com/nextlevelbuilder/goclaw/internal/bus"
"github.com/nextlevelbuilder/goclaw/internal/store"
)
// embeddedMediaPattern matches "MEDIA:" followed by a non-whitespace path.
// Duplicated from agent.mediaPathPattern to avoid tools→agent import cycle.
var embeddedMediaPattern = regexp.MustCompile(`MEDIA:\S+`)
// MessageTool allows the agent to proactively send messages to channels.
type MessageTool struct {
workspace string
restrict bool
sender ChannelSender
msgBus *bus.MessageBus
tenantChecker ChannelTenantChecker
}
func NewMessageTool(workspace string, restrict bool) *MessageTool {
return &MessageTool{workspace: workspace, restrict: restrict}
}
func (t *MessageTool) SetChannelSender(s ChannelSender) { t.sender = s }
func (t *MessageTool) SetMessageBus(b *bus.MessageBus) { t.msgBus = b }
func (t *MessageTool) SetChannelTenantChecker(c ChannelTenantChecker) { t.tenantChecker = c }
func (t *MessageTool) Name() string { return "message" }
func (t *MessageTool) Description() string {
return "Send a message to a channel (Telegram, Discord, Slack, Zalo, Feishu/Lark, WhatsApp, etc.) or the current chat. Channel and target are auto-filled from context."
}
func (t *MessageTool) Parameters() map[string]any {
return map[string]any{
"type": "object",
"properties": map[string]any{
"action": map[string]any{
"type": "string",
"description": "Action to perform: 'send'",
"enum": []string{"send"},
},
"channel": map[string]any{
"type": "string",
"description": "Channel name (default: current channel from context)",
},
"target": map[string]any{
"type": "string",
"description": "Chat ID to send to (default: current chat from context)",
},
"message": map[string]any{
"type": "string",
"description": "Message content to send. To send a file as attachment, use the prefix MEDIA: followed by the file path, e.g. 'MEDIA:docs/report.pdf' or 'MEDIA:/tmp/image.png'. The file will be uploaded as a document/photo/audio depending on its type.",
},
},
"required": []string{"action", "message"},
}
}
func (t *MessageTool) Execute(ctx context.Context, args map[string]any) *Result {
action := argString(args, "action")
if action != "send" {
return ErrorResult(fmt.Sprintf("unsupported action: %s (only 'send' is supported)", action))
}
message := argString(args, "message")
if message == "" {
return ErrorResult("message is required")
}
channel := argString(args, "channel")
if channel == "" {
channel = ToolChannelFromCtx(ctx)
}
if channel == "" {
return ErrorResult("channel is required (no current channel in context)")
}
target := argString(args, "target")
if target == "" {
target = ToolChatIDFromCtx(ctx)
}
if target == "" {
return ErrorResult("target chat ID is required (no current chat in context)")
}
// Self-send guard: prevent agent from sending to its own chat via message tool.
// Text self-sends are always blocked (response goes through normal outbound).
// MEDIA self-sends are allowed ONLY when the file was NOT already queued for
// delivery (i.e. write_file was called with deliver=false). This prevents both
// duplicate delivery (deliver=true then message MEDIA:) and runaway retry loops
// (deliver=false then message MEDIA: blocked unconditionally).
ctxChannel := ToolChannelFromCtx(ctx)
ctxChatID := ToolChatIDFromCtx(ctx)
isSelfSend := ctxChannel != "" && ctxChatID != "" && channel == ctxChannel && target == ctxChatID
if isSelfSend {
isMediaSend := embeddedMediaPattern.MatchString(message)
if !isMediaSend {
return ErrorResult("You are already responding to this chat. Your response text will be delivered automatically. Do not use the message tool to send text to your own chat — just include the content in your response text. To deliver files, use write_file with deliver=true instead.")
}
// MEDIA self-send: block if ALL referenced files are already queued for delivery.
// Extracts paths from both standalone "MEDIA:path" and embedded multi-line messages.
if dm := DeliveredMediaFromCtx(ctx); dm != nil {
mediaRefs := embeddedMediaPattern.FindAllString(message, -1)
allDelivered := len(mediaRefs) > 0
for _, raw := range mediaRefs {
if filePath, ok := t.resolveMediaPath(ctx, raw); ok {
if !dm.IsDelivered(filePath) {
allDelivered = false
break
}
}
}
if allDelivered {
return ErrorResult("This file is already queued for automatic delivery via write_file(deliver=true). Do not send it again. To deliver files that were written with deliver=false, use write_file again with deliver=true, or use message(MEDIA:path) which is allowed for undelivered files.")
}
}
}
// Tenant isolation: validate channel belongs to current tenant.
if err := t.validateChannelTenant(ctx, channel, target); err != nil {
return err
}
// Handle MEDIA: prefix — send file as attachment instead of text.
if filePath, ok := t.resolveMediaPath(ctx, message); ok {
return t.sendMedia(ctx, channel, target, filePath)
}
// Extract embedded MEDIA: paths from multi-line messages.
// LLMs may include MEDIA: in conversational text rather than as a standalone prefix.
message, embeddedMedia := t.extractEmbeddedMedia(ctx, message)
// If we found embedded media and bus is available, prefer bus path (supports media attachments).
if len(embeddedMedia) > 0 && t.msgBus != nil {
outMsg := bus.OutboundMessage{
Channel: channel,
ChatID: target,
Content: message,
Media: embeddedMedia,
}
if isGroupContext(ctx) {
outMsg.Metadata = map[string]string{"group_id": target}
}
t.msgBus.PublishOutbound(outMsg)
return SilentResult(fmt.Sprintf(`{"status":"sent","channel":"%s","target":"%s"}`, channel, target))
}
// Prefer direct channel sender for immediate delivery.
// For group chats, fall through to message bus which supports metadata.
if t.sender != nil && !isGroupContext(ctx) {
if err := t.sender(ctx, channel, target, message); err != nil {
return ErrorResult(fmt.Sprintf("failed to send message: %v", err))
}
return SilentResult(fmt.Sprintf(`{"status":"sent","channel":"%s","target":"%s"}`, channel, target))
}
// Publish via message bus outbound queue.
// Group messages include metadata so channel implementations (e.g. Zalo)
// can distinguish group sends from DMs.
if t.msgBus != nil {
outMsg := bus.OutboundMessage{
Channel: channel,
ChatID: target,
Content: message,
}
if isGroupContext(ctx) {
outMsg.Metadata = map[string]string{"group_id": target}
}
t.msgBus.PublishOutbound(outMsg)
return SilentResult(fmt.Sprintf(`{"status":"sent","channel":"%s","target":"%s"}`, channel, target))
}
// Last resort: direct sender without group metadata.
if t.sender != nil {
if err := t.sender(ctx, channel, target, message); err != nil {
return ErrorResult(fmt.Sprintf("failed to send message: %v", err))
}
return SilentResult(fmt.Sprintf(`{"status":"sent","channel":"%s","target":"%s"}`, channel, target))
}
return ErrorResult("no channel sender or message bus available")
}
// validateChannelTenant checks the target channel belongs to the current tenant.
// Returns an error Result if the send should be blocked, nil if allowed.
func (t *MessageTool) validateChannelTenant(ctx context.Context, channel, target string) *Result {
if t.tenantChecker == nil {
return nil
}
chTenant, chExists := t.tenantChecker(channel)
if !chExists {
return ErrorResult(fmt.Sprintf("channel %q not found", channel))
}
// Allow: legacy/config-based channels (zero tenant) or master tenant context (system ops).
if chTenant == uuid.Nil {
return nil
}
ctxTenant := store.TenantIDFromContext(ctx)
if ctxTenant == uuid.Nil {
return nil // master tenant / system context
}
if chTenant != ctxTenant {
slog.Warn("security.cross_tenant_send_blocked",
"channel", channel, "target", target,
"ctx_tenant", ctxTenant, "ch_tenant", chTenant)
return ErrorResult("channel not accessible from this tenant")
}
return nil
}
// sendMedia sends a file as a media attachment via the outbound message bus.
func (t *MessageTool) sendMedia(ctx context.Context, channel, target, filePath string) *Result {
if _, err := os.Stat(filePath); err != nil {
return ErrorResult(fmt.Sprintf("file not found: %s", filePath))
}
if t.msgBus == nil {
return ErrorResult("media sending requires message bus")
}
// Build metadata for group routing (Zalo needs group_id to choose group API).
var meta map[string]string
if isGroupContext(ctx) {
meta = map[string]string{"group_id": target}
}
t.msgBus.PublishOutbound(bus.OutboundMessage{
Channel: channel,
ChatID: target,
Media: []bus.MediaAttachment{{URL: filePath, ContentType: mimeFromPath(filePath)}},
Metadata: meta,
})
out, _ := json.Marshal(map[string]string{
"status": "sent",
"channel": channel,
"target": target,
"media": filepath.Base(filePath),
})
return SilentResult(string(out))
}
// extractEmbeddedMedia scans a multi-line message for embedded MEDIA: path references.
// Returns cleaned text (MEDIA: lines removed) and resolved media attachments.
// Prevents raw MEDIA: paths from leaking to channels when LLMs embed them
// in conversational text instead of using a standalone MEDIA: prefix.
func (t *MessageTool) extractEmbeddedMedia(ctx context.Context, message string) (string, []bus.MediaAttachment) {
if !strings.Contains(message, "MEDIA:") {
return message, nil
}
lines := strings.Split(message, "\n")
var cleaned []string
var media []bus.MediaAttachment
for _, line := range lines {
trimmed := strings.TrimSpace(line)
// Skip [[audio_as_voice]] tags (TTS voice messages).
if strings.HasPrefix(trimmed, "[[audio_as_voice]]") {
continue
}
// Find all MEDIA: tokens on this line.
matches := embeddedMediaPattern.FindAllString(trimmed, -1)
if len(matches) == 0 {
cleaned = append(cleaned, line)
continue
}
// Extract each MEDIA: path and resolve via security-checked path resolution.
for _, raw := range matches {
if resolved, ok := t.resolveMediaPath(ctx, raw); ok {
media = append(media, bus.MediaAttachment{
URL: resolved,
ContentType: mimeFromPath(resolved),
})
}
}
// Strip MEDIA: tokens from line, keep surrounding text.
remainder := strings.TrimSpace(embeddedMediaPattern.ReplaceAllString(line, ""))
if remainder != "" {
cleaned = append(cleaned, remainder)
}
}
return strings.TrimSpace(strings.Join(cleaned, "\n")), media
}
// mimeFromPath returns a MIME type based on file extension.
// Duplicated from agent.mimeFromExt to avoid tools→agent import cycle.
func mimeFromPath(path string) string {
switch strings.ToLower(filepath.Ext(path)) {
case ".png":
return "image/png"
case ".jpg", ".jpeg":
return "image/jpeg"
case ".gif":
return "image/gif"
case ".webp":
return "image/webp"
case ".mp4":
return "video/mp4"
case ".ogg", ".opus":
return "audio/ogg"
case ".mp3":
return "audio/mpeg"
case ".wav":
return "audio/wav"
case ".pdf":
return "application/pdf"
case ".doc":
return "application/msword"
case ".docx":
return "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
case ".xls":
return "application/vnd.ms-excel"
case ".xlsx":
return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
default:
return "application/octet-stream"
}
}
// argString reads a tool argument as a non-empty string. LLM tool JSON often encodes
// numeric chat IDs as JSON numbers (float64); a plain .(string) type assert would
// ignore them and fall back to context — wrong for proactive sends to a group.
func argString(m map[string]any, key string) string {
v, ok := m[key]
if !ok || v == nil {
return ""
}
switch s := v.(type) {
case string:
return strings.TrimSpace(s)
case float64:
if s != s { // NaN
return ""
}
// Telegram chat IDs are integers; json.Unmarshal uses float64 for all numbers.
if s == float64(int64(s)) {
return strconv.FormatInt(int64(s), 10)
}
return strings.TrimSpace(fmt.Sprintf("%.0f", s))
case int:
return strconv.FormatInt(int64(s), 10)
case int64:
return strconv.FormatInt(s, 10)
case json.Number:
return strings.TrimSpace(string(s))
default:
return strings.TrimSpace(fmt.Sprint(s))
}
}
// isGroupContext returns true if the current context indicates a group conversation.
func isGroupContext(ctx context.Context) bool {
userID := store.UserIDFromContext(ctx)
return ToolPeerKindFromCtx(ctx) == "group" ||
strings.HasPrefix(userID, "group:") ||
strings.HasPrefix(userID, "guild:")
}
// resolveMediaPath extracts and validates a file path from a "MEDIA:path" string.
// Uses the same workspace-aware path resolution as other filesystem tools.
// Multi-tenant isolation forces MEDIA: paths through restricted resolution
// first, with one explicit fallback for generated media artifacts under /tmp/.
// In practice MEDIA: paths may resolve to:
// - files inside the agent workspace
// - absolute paths under /tmp/ for generated media artifacts
//
// Relative paths are resolved against the agent's workspace.
func (t *MessageTool) resolveMediaPath(ctx context.Context, s string) (string, bool) {
s = strings.TrimSpace(s)
if !strings.HasPrefix(s, "MEDIA:") {
return "", false
}
raw := strings.TrimSpace(s[len("MEDIA:"):])
if raw == "" || raw == "." {
return "", false
}
workspace := ToolWorkspaceFromCtx(ctx)
if workspace == "" {
workspace = t.workspace
}
restrict := effectiveRestrict(ctx, t.restrict)
// resolvePath handles relative→absolute, symlink, hardlink, boundary checks.
resolved, err := resolvePath(raw, workspace, restrict)
if err != nil {
// When restricted, also allow /tmp/ paths (used by create_image, create_audio, etc.)
// But reject paths that are siblings of the workspace — these are likely traversal
// attacks where workspace/../X resolves inside /tmp/ because workspace itself is in /tmp/.
cleaned := filepath.Clean(raw)
wsParent := filepath.Dir(filepath.Clean(workspace))
if restrict && isInTempDir(cleaned) && !isPathInside(cleaned, wsParent) {
return cleaned, true
}
return "", false
}
return resolved, true
}
// isInTempDir checks whether an absolute path is inside os.TempDir().
func isInTempDir(path string) bool {
cleaned := filepath.Clean(path)
if !filepath.IsAbs(cleaned) {
return false
}
tmpDir := filepath.Clean(os.TempDir())
return strings.HasPrefix(cleaned, tmpDir+string(filepath.Separator))
}