modify the structure of the large language model options in the configuration file

This commit is contained in:
MaysWind
2025-09-21 17:49:49 +08:00
parent d9cd270ff4
commit 118558d25b
16 changed files with 195 additions and 160 deletions
+6 -3
View File
@@ -165,9 +165,6 @@ func getConfigWithoutSensitiveData(config *settings.Config) *settings.Config {
clonedConfig.DatabaseConfig.DatabasePassword = "****" clonedConfig.DatabaseConfig.DatabasePassword = "****"
clonedConfig.SMTPConfig.SMTPPasswd = "****" clonedConfig.SMTPConfig.SMTPPasswd = "****"
clonedConfig.MinIOConfig.SecretAccessKey = "****" clonedConfig.MinIOConfig.SecretAccessKey = "****"
clonedConfig.OpenAIAPIKey = "****"
clonedConfig.OpenAICompatibleAPIKey = "****"
clonedConfig.OpenRouterAPIKey = "****"
clonedConfig.SecretKey = "****" clonedConfig.SecretKey = "****"
clonedConfig.AmapApplicationSecret = "****" clonedConfig.AmapApplicationSecret = "****"
@@ -175,5 +172,11 @@ func getConfigWithoutSensitiveData(config *settings.Config) *settings.Config {
clonedConfig.WebDAVConfig.Password = "****" clonedConfig.WebDAVConfig.Password = "****"
} }
if clonedConfig.ReceiptImageRecognitionLLMConfig != nil {
clonedConfig.ReceiptImageRecognitionLLMConfig.OpenAIAPIKey = "****"
clonedConfig.ReceiptImageRecognitionLLMConfig.OpenAICompatibleAPIKey = "****"
clonedConfig.ReceiptImageRecognitionLLMConfig.OpenRouterAPIKey = "****"
}
return clonedConfig return clonedConfig
} }
+1 -1
View File
@@ -397,7 +397,7 @@ func startWebServer(c *core.CliContext) error {
apiV1Route.POST("/transaction/templates/delete.json", bindApi(api.TransactionTemplates.TemplateDeleteHandler)) apiV1Route.POST("/transaction/templates/delete.json", bindApi(api.TransactionTemplates.TemplateDeleteHandler))
// Large Language Models // Large Language Models
if config.LLMProvider != "" { if config.ReceiptImageRecognitionLLMConfig != nil && config.ReceiptImageRecognitionLLMConfig.LLMProvider != "" {
if config.TransactionFromAIImageRecognition { if config.TransactionFromAIImageRecognition {
apiV1Route.POST("/llm/transactions/recognize_receipt_image.json", bindApi(api.LargeLanguageModels.RecognizeReceiptImageHandler)) apiV1Route.POST("/llm/transactions/recognize_receipt_image.json", bindApi(api.LargeLanguageModels.RecognizeReceiptImageHandler))
} }
+12 -11
View File
@@ -165,14 +165,21 @@ webdav_proxy = system
webdav_skip_tls_verify = false webdav_skip_tls_verify = false
[llm] [llm]
# Large Language Model (LLM) provider, supports the following types: "openai", "openai_compatible", "openrouter", "ollama" # Set to true to enable creating transactions from AI image recognition results, requires "llm_provider" and its related model id to be configured properly in "llm_image_recognition" section
transaction_from_ai_image_recognition = false
# Maximum allowed AI recognition picture file size (1 - 4294967295 bytes)
max_ai_recognition_picture_size = 10485760
[llm_image_recognition]
# Large Language Model (LLM) provider for receipt image recognition, supports the following types: "openai", "openai_compatible", "openrouter", "ollama"
llm_provider = llm_provider =
# For "openai" llm provider only, OpenAI API secret key, please visit https://platform.openai.com/api-keys for more information # For "openai" llm provider only, OpenAI API secret key, please visit https://platform.openai.com/api-keys for more information
openai_api_key = openai_api_key =
# For "openai" llm provider only, receipt image recognition model for creating transactions from images # For "openai" llm provider only, receipt image recognition model for creating transactions from images
openai_receipt_image_recognition_model_id = openai_model_id =
# For "openai_compatible" llm provider only, OpenAI compatible API base url, e.g. "https://api.openai.com/v1/" # For "openai_compatible" llm provider only, OpenAI compatible API base url, e.g. "https://api.openai.com/v1/"
openai_compatible_base_url = openai_compatible_base_url =
@@ -181,25 +188,19 @@ openai_compatible_base_url =
openai_compatible_api_key = openai_compatible_api_key =
# For "openai_compatible" llm provider only, receipt image recognition model for creating transactions from images # For "openai_compatible" llm provider only, receipt image recognition model for creating transactions from images
openai_compatible_receipt_image_recognition_model_id = openai_compatible_model_id =
# For "openrouter" llm provider only, OpenRouter API key, please visit https://openrouter.ai/settings/keys for more information # For "openrouter" llm provider only, OpenRouter API key, please visit https://openrouter.ai/settings/keys for more information
openrouter_api_key = openrouter_api_key =
# For "openrouter" llm provider only, receipt image recognition model for creating transactions from images # For "openrouter" llm provider only, receipt image recognition model for creating transactions from images
openrouter_receipt_image_recognition_model_id = openrouter_model_id =
# For "ollama" llm provider only, Ollama server url, e.g. "http://127.0.0.1:11434/" # For "ollama" llm provider only, Ollama server url, e.g. "http://127.0.0.1:11434/"
ollama_server_url = ollama_server_url =
# For "ollama" llm provider only, receipt image recognition model for creating transactions from images # For "ollama" llm provider only, receipt image recognition model for creating transactions from images
ollama_receipt_image_recognition_model_id = ollama_model_id =
# Set to true to enable creating transactions from AI image recognition results, requires llm_provider and its related receipt image recognition model to be configured properly
transaction_from_ai_image_recognition = false
# Maximum allowed AI recognition picture file size (1 - 4294967295 bytes)
max_ai_recognition_picture_size = 10485760
# Requesting large language model api timeout (0 - 4294967295 milliseconds) # Requesting large language model api timeout (0 - 4294967295 milliseconds)
# Set to 0 to disable timeout for requesting large language model api, default is 60000 (60 seconds) # Set to 0 to disable timeout for requesting large language model api, default is 60000 (60 seconds)
+1 -1
View File
@@ -41,7 +41,7 @@ var (
// RecognizeReceiptImageHandler returns the recognized receipt image result // RecognizeReceiptImageHandler returns the recognized receipt image result
func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext) (any, *errs.Error) { func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext) (any, *errs.Error) {
if a.CurrentConfig().LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition { if a.CurrentConfig().ReceiptImageRecognitionLLMConfig == nil || a.CurrentConfig().ReceiptImageRecognitionLLMConfig.LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition {
return nil, errs.ErrLargeLanguageModelProviderNotEnabled return nil, errs.ErrLargeLanguageModelProviderNotEnabled
} }
+1 -1
View File
@@ -47,7 +47,7 @@ func (a *ServerSettingsApi) ServerSettingsJavascriptHandler(c *core.WebContext)
a.appendBooleanSetting(builder, "mcp", config.EnableMCPServer) a.appendBooleanSetting(builder, "mcp", config.EnableMCPServer)
} }
if config.LLMProvider != "" { if config.ReceiptImageRecognitionLLMConfig != nil && config.ReceiptImageRecognitionLLMConfig.LLMProvider != "" {
if config.TransactionFromAIImageRecognition { if config.TransactionFromAIImageRecognition {
a.appendBooleanSetting(builder, "llmt", config.TransactionFromAIImageRecognition) a.appendBooleanSetting(builder, "llmt", config.TransactionFromAIImageRecognition)
} }
+9 -16
View File
@@ -16,13 +16,10 @@ import (
// HttpLargeLanguageModelProvider defines the structure of http large language model provider // HttpLargeLanguageModelProvider defines the structure of http large language model provider
type HttpLargeLanguageModelProvider interface { type HttpLargeLanguageModelProvider interface {
// BuildTextualRequest returns the http request by the provider api definition // BuildTextualRequest returns the http request by the provider api definition
BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error)
// ParseTextualResponse returns the textual response entity by the provider api definition // ParseTextualResponse returns the textual response entity by the provider api definition
ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error)
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
GetReceiptImageRecognitionModelID() string
} }
// CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider // CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider
@@ -31,20 +28,16 @@ type CommonHttpLargeLanguageModelProvider struct {
provider HttpLargeLanguageModelProvider provider HttpLargeLanguageModelProvider
} }
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the OpenAI common compatible large language model provider // GetJsonResponse returns the json response from the OpenAI common compatible large language model provider
func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
return p.getTextualResponse(c, uid, currentConfig, request, p.provider.GetReceiptImageRecognitionModelID(), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) return p.getTextualResponse(c, uid, currentLLMConfig, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
}
func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
if modelId == "" {
return nil, errs.ErrInvalidLLMModelId
} }
func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
transport := http.DefaultTransport.(*http.Transport).Clone() transport := http.DefaultTransport.(*http.Transport).Clone()
utils.SetProxyUrl(transport, currentConfig.LargeLanguageModelAPIProxy) utils.SetProxyUrl(transport, currentLLMConfig.LargeLanguageModelAPIProxy)
if currentConfig.LargeLanguageModelAPISkipTLSVerify { if currentLLMConfig.LargeLanguageModelAPISkipTLSVerify {
transport.TLSClientConfig = &tls.Config{ transport.TLSClientConfig = &tls.Config{
InsecureSkipVerify: true, InsecureSkipVerify: true,
} }
@@ -52,10 +45,10 @@ func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context
client := &http.Client{ client := &http.Client{
Transport: transport, Transport: transport,
Timeout: time.Duration(currentConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond, Timeout: time.Duration(currentLLMConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond,
} }
httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, modelId, responseType) httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, responseType)
if err != nil { if err != nil {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error()) log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error())
+2 -2
View File
@@ -7,6 +7,6 @@ import (
// LargeLanguageModelProvider defines the structure of large language model provider // LargeLanguageModelProvider defines the structure of large language model provider
type LargeLanguageModelProvider interface { type LargeLanguageModelProvider interface {
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the large language model provider by receipt image recognition model // GetJsonResponse returns the json response from the large language model provider
GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) GetJsonResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error)
} }
@@ -8,7 +8,7 @@ import (
// LargeLanguageModelProviderContainer contains the current large language model provider // LargeLanguageModelProviderContainer contains the current large language model provider
type LargeLanguageModelProviderContainer struct { type LargeLanguageModelProviderContainer struct {
current LargeLanguageModelProvider receiptImageRecognitionCurrentProvider LargeLanguageModelProvider
} }
// Initialize a large language model provider container singleton instance // Initialize a large language model provider container singleton instance
@@ -18,31 +18,40 @@ var (
// InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config // InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config
func InitializeLargeLanguageModelProvider(config *settings.Config) error { func InitializeLargeLanguageModelProvider(config *settings.Config) error {
if config.LLMProvider == settings.OpenAILLMProvider { var err error = nil
Container.current = NewOpenAILargeLanguageModelProvider(config)
return nil if config.ReceiptImageRecognitionLLMConfig != nil {
} else if config.LLMProvider == settings.OpenAICompatibleLLMProvider { Container.receiptImageRecognitionCurrentProvider, err = initializeLargeLanguageModelProvider(config.ReceiptImageRecognitionLLMConfig)
Container.current = NewOpenAICompatibleLargeLanguageModelProvider(config)
return nil if err != nil {
} else if config.LLMProvider == settings.OpenRouterLLMProvider { return err
Container.current = NewOpenRouterLargeLanguageModelProvider(config) }
return nil }
} else if config.LLMProvider == settings.OllamaLLMProvider {
Container.current = NewOllamaLargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == "" {
Container.current = nil
return nil return nil
} }
return errs.ErrInvalidLLMProvider func initializeLargeLanguageModelProvider(llmConfig *settings.LLMConfig) (LargeLanguageModelProvider, error) {
if llmConfig.LLMProvider == settings.OpenAILLMProvider {
return NewOpenAILargeLanguageModelProvider(llmConfig), nil
} else if llmConfig.LLMProvider == settings.OpenAICompatibleLLMProvider {
return NewOpenAICompatibleLargeLanguageModelProvider(llmConfig), nil
} else if llmConfig.LLMProvider == settings.OpenRouterLLMProvider {
return NewOpenRouterLargeLanguageModelProvider(llmConfig), nil
} else if llmConfig.LLMProvider == settings.OllamaLLMProvider {
return NewOllamaLargeLanguageModelProvider(llmConfig), nil
} else if llmConfig.LLMProvider == "" {
return nil, nil
}
return nil, errs.ErrInvalidLLMProvider
} }
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model // GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model
func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
if Container.current == nil { if currentConfig.ReceiptImageRecognitionLLMConfig == nil || Container.receiptImageRecognitionCurrentProvider == nil {
return nil, errs.ErrInvalidLLMProvider return nil, errs.ErrInvalidLLMProvider
} }
return l.current.GetJsonResponseByReceiptImageRecognitionModel(c, uid, currentConfig, request) return l.receiptImageRecognitionCurrentProvider.GetJsonResponse(c, uid, currentConfig.ReceiptImageRecognitionLLMConfig, request)
} }
+15 -11
View File
@@ -19,12 +19,12 @@ const ollamaChatCompletionsPath = "api/chat"
type OllamaLargeLanguageModelProvider struct { type OllamaLargeLanguageModelProvider struct {
CommonHttpLargeLanguageModelProvider CommonHttpLargeLanguageModelProvider
OllamaServerURL string OllamaServerURL string
ReceiptImageRecognitionModelID string OllamaModelID string
} }
// BuildTextualRequest returns the http request by Ollama provider // BuildTextualRequest returns the http request by Ollama provider
func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType) requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -82,12 +82,16 @@ func (p *OllamaLargeLanguageModelProvider) ParseTextualResponse(c core.Context,
return textualResponse, nil return textualResponse, nil
} }
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of Ollama provider // GetModelID returns the model id of Ollama provider
func (p *OllamaLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { func (p *OllamaLargeLanguageModelProvider) GetModelID() string {
return p.ReceiptImageRecognitionModelID return p.OllamaModelID
}
func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
if p.OllamaModelID == "" {
return nil, errs.ErrInvalidLLMModelId
} }
func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
requestMessages := make([]any, 0) requestMessages := make([]any, 0)
if request.SystemPrompt != "" { if request.SystemPrompt != "" {
@@ -114,7 +118,7 @@ func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context,
} }
requestBody := make(map[string]any) requestBody := make(map[string]any)
requestBody["model"] = modelId requestBody["model"] = p.OllamaModelID
requestBody["stream"] = request.Stream requestBody["stream"] = request.Stream
requestBody["messages"] = requestMessages requestBody["messages"] = requestMessages
@@ -145,9 +149,9 @@ func (p *OllamaLargeLanguageModelProvider) getOllamaRequestUrl() string {
} }
// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance // NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance
func NewOllamaLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { func NewOllamaLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider {
return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{ return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{
OllamaServerURL: config.OllamaServerURL, OllamaServerURL: llmConfig.OllamaServerURL,
ReceiptImageRecognitionModelID: config.OllamaReceiptImageRecognitionModelID, OllamaModelID: llmConfig.OllamaModelID,
}) })
} }
@@ -10,14 +10,16 @@ import (
) )
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) { func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{} provider := &OllamaLargeLanguageModelProvider{
OllamaModelID: "test",
}
request := &LargeLanguageModelRequest{ request := &LargeLanguageModelRequest{
SystemPrompt: "You are a helpful assistant.", SystemPrompt: "You are a helpful assistant.",
UserPrompt: []byte("Hello, how are you?"), UserPrompt: []byte("Hello, how are you?"),
} }
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err) assert.Nil(t, err)
var body map[string]interface{} var body map[string]interface{}
@@ -28,7 +30,9 @@ func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt
} }
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{} provider := &OllamaLargeLanguageModelProvider{
OllamaModelID: "test",
}
request := &LargeLanguageModelRequest{ request := &LargeLanguageModelRequest{
SystemPrompt: "What's in this image?", SystemPrompt: "What's in this image?",
@@ -36,7 +40,7 @@ func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
} }
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err) assert.Nil(t, err)
var body map[string]interface{} var body map[string]interface{}
@@ -20,8 +20,8 @@ type OpenAIChatCompletionsLargeLanguageModelProvider interface {
// BuildChatCompletionsHttpRequest returns the chat completions http request // BuildChatCompletionsHttpRequest returns the chat completions http request
BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error)
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string // GetModelID returns the model id if supported, otherwise returns empty string
GetReceiptImageRecognitionModelID() string GetModelID() string
} }
// OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api // OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api
@@ -31,8 +31,8 @@ type OpenAICommonChatCompletionsHttpLargeLanguageModelProvider struct {
} }
// BuildTextualRequest returns the http request by OpenAI common compatible provider // BuildTextualRequest returns the http request by OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType) requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType)
if err != nil { if err != nil {
return nil, err return nil, err
@@ -105,12 +105,11 @@ func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) ParseTextual
return textualResponse, nil return textualResponse, nil
} }
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI common compatible provider func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { if p.provider.GetModelID() == "" {
return p.provider.GetReceiptImageRecognitionModelID() return nil, errs.ErrInvalidLLMModelId
} }
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
requestMessages := make([]any, 0) requestMessages := make([]any, 0)
if request.SystemPrompt != "" { if request.SystemPrompt != "" {
@@ -143,7 +142,7 @@ func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonReq
} }
requestBody := make(map[string]any) requestBody := make(map[string]any)
requestBody["model"] = modelId requestBody["model"] = p.provider.GetModelID()
requestBody["stream"] = request.Stream requestBody["stream"] = request.Stream
requestBody["messages"] = requestMessages requestBody["messages"] = requestMessages
@@ -11,7 +11,9 @@ import (
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) { func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{}, provider: &OpenAILargeLanguageModelProvider{
OpenAIModelID: "test",
},
} }
request := &LargeLanguageModelRequest{ request := &LargeLanguageModelRequest{
@@ -19,7 +21,7 @@ func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequ
UserPrompt: []byte("Hello, how are you?"), UserPrompt: []byte("Hello, how are you?"),
} }
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err) assert.Nil(t, err)
var body map[string]interface{} var body map[string]interface{}
@@ -31,7 +33,9 @@ func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequ
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{}, provider: &OpenAILargeLanguageModelProvider{
OpenAIModelID: "test",
},
} }
request := &LargeLanguageModelRequest{ request := &LargeLanguageModelRequest{
@@ -40,7 +44,7 @@ func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequ
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
} }
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err) assert.Nil(t, err)
var body map[string]interface{} var body map[string]interface{}
@@ -14,7 +14,7 @@ type OpenAICompatibleLargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider OpenAIChatCompletionsLargeLanguageModelProvider
OpenAICompatibleBaseURL string OpenAICompatibleBaseURL string
OpenAICompatibleAPIKey string OpenAICompatibleAPIKey string
ReceiptImageRecognitionModelID string OpenAICompatibleModelID string
} }
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider // BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider
@@ -32,9 +32,9 @@ func (p *OpenAICompatibleLargeLanguageModelProvider) BuildChatCompletionsHttpReq
return req, nil return req, nil
} }
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI compatible provider // GetModelID returns the model id of OpenAI compatible provider
func (p *OpenAICompatibleLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { func (p *OpenAICompatibleLargeLanguageModelProvider) GetModelID() string {
return p.ReceiptImageRecognitionModelID return p.OpenAICompatibleModelID
} }
func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string { func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string {
@@ -49,10 +49,10 @@ func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequ
} }
// NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance // NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance
func NewOpenAICompatibleLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { func NewOpenAICompatibleLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{ return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: config.OpenAICompatibleBaseURL, OpenAICompatibleBaseURL: llmConfig.OpenAICompatibleBaseURL,
OpenAICompatibleAPIKey: config.OpenAICompatibleAPIKey, OpenAICompatibleAPIKey: llmConfig.OpenAICompatibleAPIKey,
ReceiptImageRecognitionModelID: config.OpenAICompatibleReceiptImageRecognitionModelID, OpenAICompatibleModelID: llmConfig.OpenAICompatibleModelID,
}) })
} }
@@ -11,7 +11,7 @@ import (
type OpenAILargeLanguageModelProvider struct { type OpenAILargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider OpenAIChatCompletionsLargeLanguageModelProvider
OpenAIAPIKey string OpenAIAPIKey string
ReceiptImageRecognitionModelID string OpenAIModelID string
} }
const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions" const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions"
@@ -29,15 +29,15 @@ func (p *OpenAILargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c cor
return req, nil return req, nil
} }
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI provider // GetModelID returns the model id of OpenAI provider
func (p *OpenAILargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { func (p *OpenAILargeLanguageModelProvider) GetModelID() string {
return p.ReceiptImageRecognitionModelID return p.OpenAIModelID
} }
// NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance // NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance
func NewOpenAILargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { func NewOpenAILargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{ return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{
OpenAIAPIKey: config.OpenAIAPIKey, OpenAIAPIKey: llmConfig.OpenAIAPIKey,
ReceiptImageRecognitionModelID: config.OpenAIReceiptImageRecognitionModelID, OpenAIModelID: llmConfig.OpenAIModelID,
}) })
} }
@@ -11,7 +11,7 @@ import (
type OpenRouterLargeLanguageModelProvider struct { type OpenRouterLargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider OpenAIChatCompletionsLargeLanguageModelProvider
OpenRouterAPIKey string OpenRouterAPIKey string
ReceiptImageRecognitionModelID string OpenRouterModelID string
} }
const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions" const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions"
@@ -31,15 +31,15 @@ func (p *OpenRouterLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c
return req, nil return req, nil
} }
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenRouter provider // GetModelID returns the model id of OpenRouter provider
func (p *OpenRouterLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { func (p *OpenRouterLargeLanguageModelProvider) GetModelID() string {
return p.ReceiptImageRecognitionModelID return p.OpenRouterModelID
} }
// NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance // NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance
func NewOpenRouterLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { func NewOpenRouterLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{ return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{
OpenRouterAPIKey: config.OpenRouterAPIKey, OpenRouterAPIKey: llmConfig.OpenRouterAPIKey,
ReceiptImageRecognitionModelID: config.OpenRouterReceiptImageRecognitionModelID, OpenRouterModelID: llmConfig.OpenRouterModelID,
}) })
} }
+66 -48
View File
@@ -219,6 +219,23 @@ type WebDAVConfig struct {
SkipTLSVerify bool SkipTLSVerify bool
} }
// LLMConfig represents the Large Language Model setting config
type LLMConfig struct {
LLMProvider string
OpenAIAPIKey string
OpenAIModelID string
OpenAICompatibleBaseURL string
OpenAICompatibleAPIKey string
OpenAICompatibleModelID string
OpenRouterAPIKey string
OpenRouterModelID string
OllamaServerURL string
OllamaModelID string
LargeLanguageModelAPIRequestTimeout uint32
LargeLanguageModelAPIProxy string
LargeLanguageModelAPISkipTLSVerify bool
}
// TipConfig represents a tip setting config // TipConfig represents a tip setting config
type TipConfig struct { type TipConfig struct {
Enabled bool Enabled bool
@@ -292,21 +309,11 @@ type Config struct {
WebDAVConfig *WebDAVConfig WebDAVConfig *WebDAVConfig
// Large Language Model // Large Language Model
LLMProvider string
OpenAIAPIKey string
OpenAIReceiptImageRecognitionModelID string
OpenAICompatibleBaseURL string
OpenAICompatibleAPIKey string
OpenAICompatibleReceiptImageRecognitionModelID string
OpenRouterAPIKey string
OpenRouterReceiptImageRecognitionModelID string
OllamaServerURL string
OllamaReceiptImageRecognitionModelID string
TransactionFromAIImageRecognition bool TransactionFromAIImageRecognition bool
MaxAIRecognitionPictureFileSize uint32 MaxAIRecognitionPictureFileSize uint32
LargeLanguageModelAPIRequestTimeout uint32
LargeLanguageModelAPIProxy string // Large Language Model for Receipt Image Recognition
LargeLanguageModelAPISkipTLSVerify bool ReceiptImageRecognitionLLMConfig *LLMConfig
// Uuid // Uuid
UuidGeneratorType string UuidGeneratorType string
@@ -453,7 +460,13 @@ func LoadConfiguration(configFilePath string) (*Config, error) {
return nil, err return nil, err
} }
err = loadLLMConfiguration(config, cfgFile, "llm") err = loadLLMGlobalConfiguration(config, cfgFile, "llm")
if err != nil {
return nil, err
}
config.ReceiptImageRecognitionLLMConfig, err = loadLLMConfiguration(cfgFile, "llm_image_recognition")
if err != nil { if err != nil {
return nil, err return nil, err
@@ -784,46 +797,51 @@ func loadStorageConfiguration(config *Config, configFile *ini.File, sectionName
return nil return nil
} }
func loadLLMConfiguration(config *Config, configFile *ini.File, sectionName string) error { func loadLLMGlobalConfiguration(config *Config, configFile *ini.File, sectionName string) error {
llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider")
if llmProvider == "" {
config.LLMProvider = ""
} else if llmProvider == OpenAILLMProvider {
config.LLMProvider = OpenAILLMProvider
} else if llmProvider == OpenAICompatibleLLMProvider {
config.LLMProvider = OpenAICompatibleLLMProvider
} else if llmProvider == OpenRouterLLMProvider {
config.LLMProvider = OpenRouterLLMProvider
} else if llmProvider == OllamaLLMProvider {
config.LLMProvider = OllamaLLMProvider
} else {
return errs.ErrInvalidLLMProvider
}
config.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key")
config.OpenAIReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_receipt_image_recognition_model_id")
config.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url")
config.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key")
config.OpenAICompatibleReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_receipt_image_recognition_model_id")
config.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key")
config.OpenRouterReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_receipt_image_recognition_model_id")
config.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url")
config.OllamaReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "ollama_receipt_image_recognition_model_id")
config.TransactionFromAIImageRecognition = getConfigItemBoolValue(configFile, sectionName, "transaction_from_ai_image_recognition", false) config.TransactionFromAIImageRecognition = getConfigItemBoolValue(configFile, sectionName, "transaction_from_ai_image_recognition", false)
config.MaxAIRecognitionPictureFileSize = getConfigItemUint32Value(configFile, sectionName, "max_ai_recognition_picture_size", defaultAIRecognitionPictureMaxSize) config.MaxAIRecognitionPictureFileSize = getConfigItemUint32Value(configFile, sectionName, "max_ai_recognition_picture_size", defaultAIRecognitionPictureMaxSize)
config.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system")
config.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout)
config.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false)
return nil return nil
} }
func loadLLMConfiguration(configFile *ini.File, sectionName string) (*LLMConfig, error) {
llmConfig := &LLMConfig{}
llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider")
if llmProvider == "" {
llmConfig.LLMProvider = ""
} else if llmProvider == OpenAILLMProvider {
llmConfig.LLMProvider = OpenAILLMProvider
} else if llmProvider == OpenAICompatibleLLMProvider {
llmConfig.LLMProvider = OpenAICompatibleLLMProvider
} else if llmProvider == OpenRouterLLMProvider {
llmConfig.LLMProvider = OpenRouterLLMProvider
} else if llmProvider == OllamaLLMProvider {
llmConfig.LLMProvider = OllamaLLMProvider
} else {
return nil, errs.ErrInvalidLLMProvider
}
llmConfig.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key")
llmConfig.OpenAIModelID = getConfigItemStringValue(configFile, sectionName, "openai_model_id")
llmConfig.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url")
llmConfig.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key")
llmConfig.OpenAICompatibleModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_model_id")
llmConfig.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key")
llmConfig.OpenRouterModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_model_id")
llmConfig.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url")
llmConfig.OllamaModelID = getConfigItemStringValue(configFile, sectionName, "ollama_model_id")
llmConfig.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system")
llmConfig.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout)
llmConfig.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false)
return llmConfig, nil
}
func loadUuidConfiguration(config *Config, configFile *ini.File, sectionName string) error { func loadUuidConfiguration(config *Config, configFile *ini.File, sectionName string) error {
if getConfigItemStringValue(configFile, sectionName, "generator_type") == InternalUuidGeneratorType { if getConfigItemStringValue(configFile, sectionName, "generator_type") == InternalUuidGeneratorType {
config.UuidGeneratorType = InternalUuidGeneratorType config.UuidGeneratorType = InternalUuidGeneratorType