"""Azure AI Search 工具的配置。
本模块提供 Azure AI Search 工具的配置类,包括
认证、搜索行为、重试策略和缓存设置。
"""
import logging
from typing import (
List,
Literal,
Optional,
TypeVar,
Union,
)
from azure.core.credentials import AzureKeyCredential
from azure.core.credentials_async import AsyncTokenCredential
from pydantic import BaseModel, Field, field_validator, model_validator
T = TypeVar("T", bound="AzureAISearchConfig")
logger = logging.getLogger(__name__)
QueryTypeLiteral = Literal["simple", "full", "semantic", "vector"]
DEFAULT_API_VERSION = "2023-10-01-preview"
[文档]
class AzureAISearchConfig(BaseModel):
"""带验证的 Azure AI Search 配置。
此类定义 Azure AI Search 工具的配置参数,包括
认证、搜索行为、缓存和嵌入设置。
.. note::
此类需要 ``autogen-ext`` 包的 ``azure`` 额外组件。
.. code-block:: bash
pip install -U "autogen-ext[azure]"
.. note::
**先决条件:**
1. 必须在您的 Azure 订阅中创建 Azure AI Search 服务
2. 搜索索引必须为您的用例正确配置:
- 向量搜索: 索引必须包含向量字段
- 语义搜索: 索引必须配置语义设置
- 混合搜索: 必须同时配置向量字段和文本字段
3. 所需软件包:
- 基础功能: ``azure-search-documents>=11.4.0``
- Azure OpenAI 嵌入: ``openai azure-identity``
- OpenAI 嵌入: ``openai``
使用示例:
.. code-block:: python
from azure.core.credentials import AzureKeyCredential
from autogen_ext.tools.azure import AzureAISearchConfig
# 全文搜索基础配置
config = AzureAISearchConfig(
name="doc-search",
endpoint="https://your-search.search.windows.net", # 您的 Azure AI Search 端点
index_name="<your-index>", # 您的搜索索引名称
credential=AzureKeyCredential("<your-key>"), # 您的 Azure AI Search 管理密钥
query_type="simple",
search_fields=["content", "title"], # 更新为您的可搜索字段
top=5,
)
# 使用 Azure OpenAI 嵌入的向量搜索配置
vector_config = AzureAISearchConfig(
name="vector-search",
endpoint="https://your-search.search.windows.net",
index_name="<your-index>",
credential=AzureKeyCredential("<your-key>"),
query_type="vector",
vector_fields=["embedding"], # 更新为您的向量字段名称
embedding_provider="azure_openai",
embedding_model="text-embedding-ada-002",
openai_endpoint="https://your-openai.openai.azure.com", # 您的 Azure OpenAI 端点
openai_api_key="<your-openai-key>", # 您的 Azure OpenAI 密钥
top=5,
)
# 带语义排序的混合搜索配置
hybrid_config = AzureAISearchConfig(
name="hybrid-search",
endpoint="https://your-search.search.windows.net",
index_name="<your-index>",
credential=AzureKeyCredential("<your-key>"),
query_type="semantic",
semantic_config_name="<your-semantic-config>", # 您的语义配置名称
search_fields=["content", "title"], # 更新为您的搜索字段
vector_fields=["embedding"], # 更新为您的向量字段名称
embedding_provider="openai",
embedding_model="text-embedding-ada-002",
openai_api_key="<your-openai-key>", # 您的 OpenAI API 密钥
top=5,
)
"""
name: str = Field(description="The name of this tool instance")
description: Optional[str] = Field(default=None, description="Description explaining the tool's purpose")
endpoint: str = Field(description="The full URL of your Azure AI Search service")
index_name: str = Field(description="Name of the search index to query")
credential: Union[AzureKeyCredential, AsyncTokenCredential] = Field(
description="Azure credential for authentication (API key or token)"
)
api_version: str = Field(
default=DEFAULT_API_VERSION,
description=f"Azure AI Search API version to use. Defaults to {DEFAULT_API_VERSION}.",
)
query_type: QueryTypeLiteral = Field(
default="simple", description="Type of search to perform: simple, full, semantic, or vector"
)
search_fields: Optional[List[str]] = Field(default=None, description="Fields to search within documents")
select_fields: Optional[List[str]] = Field(default=None, description="Fields to return in search results")
vector_fields: Optional[List[str]] = Field(default=None, description="Fields to use for vector search")
top: Optional[int] = Field(
default=None, description="Maximum number of results to return. For vector searches, acts as k in k-NN."
)
filter: Optional[str] = Field(default=None, description="OData filter expression to refine search results")
semantic_config_name: Optional[str] = Field(
default=None, description="Semantic configuration name for enhanced results"
)
enable_caching: bool = Field(default=False, description="Whether to cache search results")
cache_ttl_seconds: int = Field(default=300, description="How long to cache results in seconds")
embedding_provider: Optional[str] = Field(
default=None, description="Name of embedding provider for client-side embeddings"
)
embedding_model: Optional[str] = Field(default=None, description="Model name for client-side embeddings")
openai_api_key: Optional[str] = Field(default=None, description="API key for OpenAI/Azure OpenAI embeddings")
openai_api_version: Optional[str] = Field(default=None, description="API version for Azure OpenAI embeddings")
openai_endpoint: Optional[str] = Field(default=None, description="Endpoint URL for Azure OpenAI embeddings")
model_config = {"arbitrary_types_allowed": True}
[文档]
@field_validator("endpoint")
def validate_endpoint(cls, v: str) -> str:
"""验证端点是否为有效 URL。"""
if not v.startswith(("http://", "https://")):
raise ValueError("endpoint must be a valid URL starting with http:// or https://")
return v
[文档]
@field_validator("query_type")
def normalize_query_type(cls, v: QueryTypeLiteral) -> QueryTypeLiteral:
"""将查询类型标准化为规范值。"""
if not v:
return "simple"
if isinstance(v, str) and v.lower() == "fulltext":
return "full"
return v
[文档]
@field_validator("top")
def validate_top(cls, v: Optional[int]) -> Optional[int]:
"""如果提供了 top 参数,确保其为正整数。"""
if v is not None and v <= 0:
raise ValueError("top must be a positive integer")
return v
[文档]
@model_validator(mode="after")
def validate_interdependent_fields(self) -> "AzureAISearchConfig":
"""在所有字段解析完成后验证相互依赖的字段。"""
if self.query_type == "semantic" and not self.semantic_config_name:
raise ValueError("semantic_config_name must be provided when query_type is 'semantic'")
if self.query_type == "vector" and not self.vector_fields:
raise ValueError("vector_fields must be provided for vector search")
if (
self.embedding_provider
and self.embedding_provider.lower() == "azure_openai"
and self.embedding_model
and not self.openai_endpoint
):
raise ValueError("openai_endpoint must be provided for azure_openai embedding provider")
return self