{"passport":{"unfragile":{"@version":"1.0","version":"2026-05","artifact":{"id":"github-joinly-ai--joinly","slug":"joinly-ai--joinly","name":"joinly","type":"product","url":"https://joinly.ai","page_url":"https://unfragile.ai/joinly-ai--joinly","categories":["chatbots-assistants"],"tags":["agentic-ai","ai-agent","ai-tool","conversational-ai","llm","mcp","meeting-agent","meeting-assistant","meeting-notes","productivity","python","transcription","voice-ai"],"pricing":{"model":"open_source","free":true,"starting_price":null},"status":"active","verified":false},"capabilities":[{"id":"github-joinly-ai--joinly__cap_0","uri":"capability://automation.workflow.browser.based.meeting.platform.joining.with.platform.specific.automation","name":"browser-based meeting platform joining with platform-specific automation","description":"Enables AI agents to join Google Meet, Zoom, and Microsoft Teams meetings through Playwright-based browser automation with platform-specific controllers that handle each platform's unique UI patterns, authentication flows, and meeting state management. The BrowserMeetingProvider abstracts platform differences while delegating to GoogleMeetController, ZoomController, and TeamsController for platform-specific interactions, managing virtual display (Xvfb) and audio device routing.","intents":["I want my AI agent to automatically join a scheduled video meeting on Google Meet, Zoom, or Teams","I need to handle platform-specific UI quirks when joining meetings programmatically","I want to manage meeting lifecycle (join, stay in call, leave) without manual intervention"],"best_for":["teams building meeting-aware AI agents","developers automating meeting participation workflows","enterprises needing AI agents in standardized video platforms"],"limitations":["Requires headless browser environment with virtual display (Xvfb) and audio device support","Platform UI changes may break automation until controllers are updated","Cannot bypass platform authentication — requires valid meeting links or credentials","Browser automation adds 3-5 second latency for meeting join operations"],"requires":["Docker container with Xvfb virtual display","PulseAudio or equivalent audio device for I/O","Playwright browser driver (Chromium)","Valid meeting URL or credentials for target platform","Python 3.9+"],"input_types":["meeting URL (string)","platform identifier (enum: google_meet, zoom, teams)","optional credentials (username, password)"],"output_types":["meeting session object with connection state","audio stream handle for transcription pipeline","video stream metadata"],"categories":["automation-workflow","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_1","uri":"capability://data.processing.analysis.real.time.audio.capture.and.voice.activity.detection.pipeline","name":"real-time audio capture and voice activity detection pipeline","description":"Captures audio from meeting participants in real-time through PulseAudio integration and applies Voice Activity Detection (VAD) to filter silence and background noise before sending to transcription. The DefaultTranscriptionController orchestrates the VAD → STT pipeline, using pluggable VAD service providers (local or cloud-based) to reduce transcription costs by only processing segments with actual speech.","intents":["I want to capture only speech segments from meetings, not silence or background noise","I need to reduce transcription API costs by filtering non-speech audio","I want real-time audio processing with minimal latency for agent responsiveness"],"best_for":["cost-conscious teams using cloud STT services","developers building low-latency meeting agents","deployments with bandwidth or compute constraints"],"limitations":["VAD accuracy varies by audio quality and background noise levels","Local VAD adds ~50-100ms latency per audio chunk","PulseAudio configuration required for audio device routing","Cannot distinguish between multiple speakers without additional speaker diarization"],"requires":["PulseAudio daemon running in container","VAD service provider configured (local or cloud API key)","Audio format specification (sample rate, bit depth, channels)","Python 3.9+"],"input_types":["raw audio stream (PCM, configurable sample rate)","audio format metadata (AudioFormat type)"],"output_types":["filtered audio chunks (only speech segments)","VAD confidence scores","audio frame timestamps"],"categories":["data-processing-analysis","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_10","uri":"capability://tool.use.integration.client.sdk.with.joinlyclient.api.for.agent.development","name":"client sdk with joinlyclient api for agent development","description":"Provides high-level Python SDK (joinly-client package) with JoinlyClient class that abstracts MCP communication and session management, enabling developers to build meeting agents without understanding MCP protocol details. SDK handles connection lifecycle, tool calling, and transcript streaming, providing a simple async API for agent code.","intents":["I want to build a meeting agent without learning MCP protocol details","I need a simple Python API to join meetings and interact with participants","I want to focus on agent logic instead of infrastructure"],"best_for":["Python developers building meeting agents","teams prioritizing rapid agent development","developers unfamiliar with MCP protocol"],"limitations":["SDK abstractions add ~50-100ms latency per operation","Limited to Python — no JavaScript or other language support","SDK version must match server version for compatibility","No built-in error recovery — requires manual retry logic"],"requires":["Python 3.9+","joinly-client package installed","Joinly server running and accessible","LLM provider API key (OpenAI, Anthropic, etc.)"],"input_types":["meeting URL","LLM provider configuration","agent system prompt"],"output_types":["JoinlyClient instance","agent responses and tool call results"],"categories":["tool-use-integration","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_11","uri":"capability://data.processing.analysis.shared.type.system.and.protocol.definitions.for.cross.package.consistency","name":"shared type system and protocol definitions for cross-package consistency","description":"Defines shared data types (Transcript, AudioFormat, AudioChunk) and service provider protocols in joinly-common package, ensuring consistent interfaces across server and client packages. Protocols define expected behavior for VAD, STT, and TTS providers, enabling type-safe provider implementations and reducing integration errors.","intents":["I want to ensure type consistency between server and client code","I need to implement custom service providers that integrate seamlessly","I want to avoid serialization/deserialization errors between packages"],"best_for":["teams extending Joinly with custom providers","developers building multi-package systems","projects requiring type safety across package boundaries"],"limitations":["Type definitions must be manually updated when adding new capabilities","Protocol definitions don't enforce runtime behavior — only type contracts","No schema validation — relies on Python type hints which aren't enforced at runtime","Changes to shared types require coordinated updates across packages"],"requires":["Python 3.9+ with type hints support","joinly-common package installed","understanding of Python protocols and type hints"],"input_types":["service provider implementations","data objects (Transcript, AudioChunk, etc.)"],"output_types":["type-checked data flowing between packages","protocol compliance validation"],"categories":["data-processing-analysis","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_2","uri":"capability://data.processing.analysis.speech.to.text.transcription.with.pluggable.provider.support","name":"speech-to-text transcription with pluggable provider support","description":"Converts filtered audio segments to text using configurable STT service providers (e.g., OpenAI Whisper, Google Cloud Speech, local models). The DefaultTranscriptionController receives VAD-filtered audio chunks and routes them to the selected STT provider, returning Transcript objects with text, confidence scores, and timing metadata for agent consumption.","intents":["I want to transcribe meeting audio using my preferred STT provider","I need transcripts with timing information to correlate with meeting events","I want to switch STT providers without changing agent code"],"best_for":["teams with existing STT provider relationships","developers building multi-provider agent systems","deployments with specific compliance or latency requirements"],"limitations":["STT latency varies by provider (100ms-2s depending on audio length and provider)","Requires API credentials for cloud providers","Accuracy depends on audio quality, speaker accents, and domain-specific terminology","No built-in speaker identification — all transcripts attributed to 'meeting' source"],"requires":["STT provider API key or local model weights","Audio format compatible with selected provider","Network connectivity for cloud providers","Python 3.9+"],"input_types":["audio chunks (PCM or encoded format)","audio metadata (duration, sample rate, channels)"],"output_types":["Transcript objects (text, confidence, start_time, end_time)","structured transcript with speaker and timing metadata"],"categories":["data-processing-analysis","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_3","uri":"capability://text.generation.language.text.to.speech.synthesis.with.real.time.audio.output","name":"text-to-speech synthesis with real-time audio output","description":"Converts agent text responses to speech and outputs audio to the meeting in real-time using configurable TTS service providers (e.g., Resemble, Google Cloud TTS, local TTS engines). The DefaultSpeechController manages the TTS → audio output pipeline, handling audio format conversion, buffering, and PulseAudio device routing to ensure agent speech is heard by meeting participants.","intents":["I want my AI agent to speak responses aloud in the meeting","I need natural-sounding speech synthesis with minimal latency","I want to choose TTS providers based on voice quality or cost"],"best_for":["teams building conversational meeting agents","developers prioritizing natural interaction experience","deployments with specific voice or language requirements"],"limitations":["TTS latency (200ms-1s) creates perceptible delay before agent speaks","Voice quality and naturalness vary significantly by provider","Requires audio output device routing through PulseAudio","No built-in prosody control — agent speech lacks intonation variation"],"requires":["TTS provider API key or local model","PulseAudio sink configured for meeting audio output","Text input in supported language for selected provider","Python 3.9+"],"input_types":["agent response text (string)","optional voice/language parameters"],"output_types":["audio stream to PulseAudio sink","audio metadata (duration, format)"],"categories":["text-generation-language","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_4","uri":"capability://tool.use.integration.mcp.based.meeting.tool.exposure.for.llm.agents","name":"mcp-based meeting tool exposure for llm agents","description":"Exposes meeting capabilities (join, transcribe, speak, get participants, etc.) as standardized Model Context Protocol (MCP) tools that LLM agents can call. The FastMCP server interface wraps meeting operations as callable tools with JSON schemas, enabling any MCP-compatible LLM client to interact with meetings through a standard protocol without needing to understand Joinly's internal APIs.","intents":["I want my LLM agent to call meeting operations using standard MCP tool calling","I need to integrate Joinly with Claude, GPT, or other MCP-compatible LLMs","I want to expose meeting state and actions through a standardized interface"],"best_for":["teams using Claude or other MCP-compatible LLMs","developers building multi-agent systems with standardized tool interfaces","enterprises needing interoperability between different AI platforms"],"limitations":["MCP protocol adds ~50-100ms latency per tool call (HTTP round-trip)","Tool schemas must be manually maintained in sync with backend capabilities","No built-in rate limiting or quota management for tool calls","Requires MCP-compatible LLM client (not all LLMs support MCP)"],"requires":["FastMCP server running (included in Joinly server)","MCP-compatible LLM client (Claude, custom implementation)","HTTP connectivity between client and server","Python 3.9+"],"input_types":["MCP tool call requests (JSON with tool name and parameters)"],"output_types":["MCP tool responses (JSON with result or error)","Server-Sent Events for real-time updates (transcripts, participant changes)"],"categories":["tool-use-integration","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_5","uri":"capability://automation.workflow.session.management.and.dependency.injection.for.meeting.orchestration","name":"session management and dependency injection for meeting orchestration","description":"Manages meeting session lifecycle (creation, state tracking, resource cleanup) through the MeetingSession orchestrator class, using dependency injection to wire together platform providers, audio controllers, and service implementations. Sessions maintain state across multiple operations, handle concurrent audio processing, and ensure proper resource cleanup on meeting termination.","intents":["I want to manage multiple concurrent meeting sessions without resource conflicts","I need to track meeting state (participants, audio status, connection health)","I want to ensure audio devices and browser processes are properly cleaned up"],"best_for":["teams running multiple meeting agents simultaneously","developers building production meeting systems with resource constraints","deployments requiring reliable session lifecycle management"],"limitations":["Session state is in-memory only — no persistence across server restarts","Concurrent sessions share PulseAudio and browser resources, limiting scalability","No built-in session recovery — failed sessions require manual restart","Dependency injection adds complexity to session initialization"],"requires":["MeetingSession class instantiation with configured providers","Dependency injection container with all service implementations","Python 3.9+","Docker container with resource limits"],"input_types":["session configuration (meeting URL, platform, provider settings)","service provider instances (STT, TTS, VAD)"],"output_types":["MeetingSession object with state tracking","session lifecycle events (joined, participant_added, left)"],"categories":["automation-workflow","planning-reasoning"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_6","uri":"capability://planning.reasoning.conversational.agent.framework.with.llm.integration","name":"conversational agent framework with llm integration","description":"Provides ConversationalToolAgent class that wraps LLM integration for building meeting agents that can understand meeting context, call MCP tools, and generate responses. The agent maintains conversation history, handles tool calling loops, and integrates with any LLM provider that supports function calling (OpenAI, Anthropic, local models via Ollama).","intents":["I want to build an AI agent that understands meeting context and responds conversationally","I need to integrate my preferred LLM (GPT, Claude, Llama) with meeting operations","I want the agent to call meeting tools (transcribe, speak, get participants) autonomously"],"best_for":["teams building conversational meeting assistants","developers integrating multiple LLM providers","deployments with specific LLM requirements (local, proprietary, etc.)"],"limitations":["Agent reasoning latency depends on LLM response time (1-5s for cloud LLMs)","Conversation history grows unbounded — requires manual pruning for long meetings","No built-in context management — agent sees all meeting transcripts without filtering","Tool calling loops can fail if LLM doesn't follow schema correctly"],"requires":["LLM provider API key (OpenAI, Anthropic) or local LLM via Ollama","MCP tool definitions matching LLM's function calling format","Python 3.9+","ConversationalToolAgent class from joinly-client package"],"input_types":["user message or meeting transcript","meeting context (participants, current topic)","MCP tool definitions"],"output_types":["agent response text","tool calls (function name and parameters)","conversation history with reasoning"],"categories":["planning-reasoning","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_7","uri":"capability://tool.use.integration.multi.provider.service.abstraction.with.runtime.configuration","name":"multi-provider service abstraction with runtime configuration","description":"Provides pluggable service provider architecture for VAD, STT, and TTS, allowing runtime selection and configuration without code changes. Service providers are registered in a dependency injection container, enabling easy swapping between local and cloud implementations based on deployment environment (privacy requirements, cost, latency).","intents":["I want to use local speech services in production for privacy, but cloud services in development for cost","I need to switch STT providers based on language or domain requirements","I want to avoid vendor lock-in by supporting multiple TTS/STT providers"],"best_for":["enterprises with strict privacy or compliance requirements","teams managing multiple deployments with different constraints","developers building flexible, vendor-agnostic systems"],"limitations":["Service provider APIs vary in latency, accuracy, and cost — no unified performance guarantees","Configuration complexity increases with more provider options","Provider-specific features (e.g., voice customization) may not be portable across providers","No built-in cost tracking or provider selection optimization"],"requires":["Service provider API keys or local model weights","Configuration file or environment variables specifying provider selection","Python 3.9+","joinly-common package with service provider protocols"],"input_types":["provider configuration (type, API key, model name)","service-specific parameters (language, voice, sample rate)"],"output_types":["service provider instance ready for use","provider-specific metadata (supported languages, voices, etc.)"],"categories":["tool-use-integration","automation-workflow"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_8","uri":"capability://data.processing.analysis.real.time.transcript.streaming.with.timing.metadata","name":"real-time transcript streaming with timing metadata","description":"Streams transcripts from meetings to connected clients in real-time using Server-Sent Events (SSE), including timing information (start_time, end_time) and speaker metadata. The Transcript data type (from joinly-common) standardizes transcript format across all STT providers, enabling consistent agent consumption regardless of backend.","intents":["I want to receive meeting transcripts in real-time as they're generated","I need timing information to correlate transcripts with meeting events","I want to build real-time transcript displays or agent decision-making based on live speech"],"best_for":["teams building real-time meeting dashboards","developers creating live agent decision-making systems","deployments requiring low-latency transcript delivery"],"limitations":["SSE adds ~100-200ms latency per transcript event","No built-in transcript persistence — requires external storage for archival","Transcript timing accuracy depends on STT provider's timestamp precision","No speaker diarization — cannot distinguish between multiple speakers"],"requires":["HTTP client supporting Server-Sent Events","MCP server running with transcript streaming enabled","Network connectivity between client and server","Python 3.9+"],"input_types":["SSE connection to MCP server"],"output_types":["Transcript objects (text, start_time, end_time, confidence)","streaming events with real-time updates"],"categories":["data-processing-analysis","memory-knowledge"],"confidence":0.5,"matches":0,"success_rate":0},{"id":"github-joinly-ai--joinly__cap_9","uri":"capability://automation.workflow.docker.based.deployment.with.virtual.display.and.audio.device.management","name":"docker-based deployment with virtual display and audio device management","description":"Provides Docker containerization with pre-configured Xvfb virtual display, PulseAudio daemon, and Playwright browser for headless meeting automation. Multiple Docker image variants support different deployment scenarios (minimal, full-featured, GPU-accelerated), with environment variable configuration for service providers and meeting parameters.","intents":["I want to deploy meeting agents in cloud environments without physical displays or audio devices","I need reliable audio I/O routing in containerized environments","I want to scale meeting agents across multiple containers"],"best_for":["teams deploying to Kubernetes or cloud platforms","developers building scalable meeting agent infrastructure","enterprises requiring containerized, reproducible deployments"],"limitations":["Virtual display (Xvfb) adds ~5-10% CPU overhead compared to native browser","PulseAudio configuration is complex and error-prone in containers","Audio device routing requires careful Docker volume/device mapping","Scaling is limited by browser resource consumption per container (~500MB-1GB per session)"],"requires":["Docker daemon with sufficient CPU and memory","Docker image variants (minimal, full, gpu)","Environment variables for service provider configuration","Optional: GPU support for local speech models"],"input_types":["Docker environment variables (STT_PROVIDER, TTS_PROVIDER, etc.)","meeting URL and credentials"],"output_types":["running container with MCP server exposed on port","logs and metrics for monitoring"],"categories":["automation-workflow","tool-use-integration"],"confidence":0.5,"matches":0,"success_rate":0}],"trust":{"score":31,"verified":false,"data_access_risk":"low","permissions":["Docker container with Xvfb virtual display","PulseAudio or equivalent audio device for I/O","Playwright browser driver (Chromium)","Valid meeting URL or credentials for target platform","Python 3.9+","PulseAudio daemon running in container","VAD service provider configured (local or cloud API key)","Audio format specification (sample rate, bit depth, channels)","joinly-client package installed","Joinly server running and accessible"],"failure_modes":["Requires headless browser environment with virtual display (Xvfb) and audio device support","Platform UI changes may break automation until controllers are updated","Cannot bypass platform authentication — requires valid meeting links or credentials","Browser automation adds 3-5 second latency for meeting join operations","VAD accuracy varies by audio quality and background noise levels","Local VAD adds ~50-100ms latency per audio chunk","PulseAudio configuration required for audio device routing","Cannot distinguish between multiple speakers without additional speaker diarization","SDK abstractions add ~50-100ms latency per operation","Limited to Python — no JavaScript or other language support","builder identity is not verified yet","no observed match outcomes yet"],"rank_breakdown":{"adoption":0.19710181958733977,"quality":0.34,"ecosystem":0.6000000000000001,"match_graph":0.25,"freshness":0.6,"weights":{"adoption":0.25,"quality":0.25,"ecosystem":0.1,"match_graph":0.35,"freshness":0.05}},"observed_outcomes":{"matches":0,"success_rate":0,"avg_confidence":0,"top_intents":[],"last_matched_at":null},"maintenance":{"status":"active","updated_at":"2026-05-24T12:16:21.550Z","last_scraped_at":"2026-05-03T13:57:13.678Z","last_commit":"2026-03-19T15:13:10Z"},"community":{"stars":496,"forks":81,"weekly_downloads":null,"model_downloads":null,"model_likes":null}},"distribution":{"claim_url":"https://unfragile.ai/submit?claim=joinly-ai--joinly","compare_url":"https://unfragile.ai/compare?artifact=joinly-ai--joinly"}},"signature":"40Gq3pjYYxziJ1g1Erq8Kp1F9XbvQA/0okK0b2P62/Pelc0hroUpO2+24nLpuXpOcVPnrmLzW/oZWnOJBO3iCg==","signedAt":"2026-06-22T11:53:00.305Z","signedBy":"unfragile.ai","version":1},"_links":{"self":"https://unfragile.ai/api/v1/passport/joinly-ai--joinly","artifact":"https://unfragile.ai/joinly-ai--joinly","verify":"https://unfragile.ai/api/v1/verify?slug=joinly-ai--joinly","publicKey":"https://unfragile.ai/api/v1/trust-passport-public-key","spec":"https://unfragile.ai/trust","schema":"https://unfragile.ai/schema.json","docs":"https://unfragile.ai/docs"}}