autogen_ext.tools.azure._config 源代码

"""Azure AI Search 工具的配置。

本模块提供 Azure AI Search 工具的配置类,包括
认证、搜索行为、重试策略和缓存设置。
"""

import logging
from typing import (
    List,
    Literal,
    Optional,
    TypeVar,
    Union,
)

from azure.core.credentials import AzureKeyCredential
from azure.core.credentials_async import AsyncTokenCredential
from pydantic import BaseModel, Field, field_validator, model_validator

T = TypeVar("T", bound="AzureAISearchConfig")

logger = logging.getLogger(__name__)

QueryTypeLiteral = Literal["simple", "full", "semantic", "vector"]
DEFAULT_API_VERSION = "2023-10-01-preview"


[文档] class AzureAISearchConfig(BaseModel): """带验证的 Azure AI Search 配置。 此类定义 Azure AI Search 工具的配置参数,包括 认证、搜索行为、缓存和嵌入设置。 .. note:: 此类需要 ``autogen-ext`` 包的 ``azure`` 额外组件。 .. code-block:: bash pip install -U "autogen-ext[azure]" .. note:: **先决条件:** 1. 必须在您的 Azure 订阅中创建 Azure AI Search 服务 2. 搜索索引必须为您的用例正确配置: - 向量搜索: 索引必须包含向量字段 - 语义搜索: 索引必须配置语义设置 - 混合搜索: 必须同时配置向量字段和文本字段 3. 所需软件包: - 基础功能: ``azure-search-documents>=11.4.0`` - Azure OpenAI 嵌入: ``openai azure-identity`` - OpenAI 嵌入: ``openai`` 使用示例: .. code-block:: python from azure.core.credentials import AzureKeyCredential from autogen_ext.tools.azure import AzureAISearchConfig # 全文搜索基础配置 config = AzureAISearchConfig( name="doc-search", endpoint="https://your-search.search.windows.net", # 您的 Azure AI Search 端点 index_name="<your-index>", # 您的搜索索引名称 credential=AzureKeyCredential("<your-key>"), # 您的 Azure AI Search 管理密钥 query_type="simple", search_fields=["content", "title"], # 更新为您的可搜索字段 top=5, ) # 使用 Azure OpenAI 嵌入的向量搜索配置 vector_config = AzureAISearchConfig( name="vector-search", endpoint="https://your-search.search.windows.net", index_name="<your-index>", credential=AzureKeyCredential("<your-key>"), query_type="vector", vector_fields=["embedding"], # 更新为您的向量字段名称 embedding_provider="azure_openai", embedding_model="text-embedding-ada-002", openai_endpoint="https://your-openai.openai.azure.com", # 您的 Azure OpenAI 端点 openai_api_key="<your-openai-key>", # 您的 Azure OpenAI 密钥 top=5, ) # 带语义排序的混合搜索配置 hybrid_config = AzureAISearchConfig( name="hybrid-search", endpoint="https://your-search.search.windows.net", index_name="<your-index>", credential=AzureKeyCredential("<your-key>"), query_type="semantic", semantic_config_name="<your-semantic-config>", # 您的语义配置名称 search_fields=["content", "title"], # 更新为您的搜索字段 vector_fields=["embedding"], # 更新为您的向量字段名称 embedding_provider="openai", embedding_model="text-embedding-ada-002", openai_api_key="<your-openai-key>", # 您的 OpenAI API 密钥 top=5, ) """ name: str = Field(description="The name of this tool instance") description: Optional[str] = Field(default=None, description="Description explaining the tool's purpose") endpoint: str = Field(description="The full URL of your Azure AI Search service") index_name: str = Field(description="Name of the search index to query") credential: Union[AzureKeyCredential, AsyncTokenCredential] = Field( description="Azure credential for authentication (API key or token)" ) api_version: str = Field( default=DEFAULT_API_VERSION, description=f"Azure AI Search API version to use. Defaults to {DEFAULT_API_VERSION}.", ) query_type: QueryTypeLiteral = Field( default="simple", description="Type of search to perform: simple, full, semantic, or vector" ) search_fields: Optional[List[str]] = Field(default=None, description="Fields to search within documents") select_fields: Optional[List[str]] = Field(default=None, description="Fields to return in search results") vector_fields: Optional[List[str]] = Field(default=None, description="Fields to use for vector search") top: Optional[int] = Field( default=None, description="Maximum number of results to return. For vector searches, acts as k in k-NN." ) filter: Optional[str] = Field(default=None, description="OData filter expression to refine search results") semantic_config_name: Optional[str] = Field( default=None, description="Semantic configuration name for enhanced results" ) enable_caching: bool = Field(default=False, description="Whether to cache search results") cache_ttl_seconds: int = Field(default=300, description="How long to cache results in seconds") embedding_provider: Optional[str] = Field( default=None, description="Name of embedding provider for client-side embeddings" ) embedding_model: Optional[str] = Field(default=None, description="Model name for client-side embeddings") openai_api_key: Optional[str] = Field(default=None, description="API key for OpenAI/Azure OpenAI embeddings") openai_api_version: Optional[str] = Field(default=None, description="API version for Azure OpenAI embeddings") openai_endpoint: Optional[str] = Field(default=None, description="Endpoint URL for Azure OpenAI embeddings") model_config = {"arbitrary_types_allowed": True}
[文档] @field_validator("endpoint") def validate_endpoint(cls, v: str) -> str: """验证端点是否为有效 URL。""" if not v.startswith(("http://", "https://")): raise ValueError("endpoint must be a valid URL starting with http:// or https://") return v
[文档] @field_validator("query_type") def normalize_query_type(cls, v: QueryTypeLiteral) -> QueryTypeLiteral: """将查询类型标准化为规范值。""" if not v: return "simple" if isinstance(v, str) and v.lower() == "fulltext": return "full" return v
[文档] @field_validator("top") def validate_top(cls, v: Optional[int]) -> Optional[int]: """如果提供了 top 参数,确保其为正整数。""" if v is not None and v <= 0: raise ValueError("top must be a positive integer") return v
[文档] @model_validator(mode="after") def validate_interdependent_fields(self) -> "AzureAISearchConfig": """在所有字段解析完成后验证相互依赖的字段。""" if self.query_type == "semantic" and not self.semantic_config_name: raise ValueError("semantic_config_name must be provided when query_type is 'semantic'") if self.query_type == "vector" and not self.vector_fields: raise ValueError("vector_fields must be provided for vector search") if ( self.embedding_provider and self.embedding_provider.lower() == "azure_openai" and self.embedding_model and not self.openai_endpoint ): raise ValueError("openai_endpoint must be provided for azure_openai embedding provider") return self