diff --git a/cmd/initializer.go b/cmd/initializer.go index 5389e003..e0f37129 100644 --- a/cmd/initializer.go +++ b/cmd/initializer.go @@ -165,9 +165,6 @@ func getConfigWithoutSensitiveData(config *settings.Config) *settings.Config { clonedConfig.DatabaseConfig.DatabasePassword = "****" clonedConfig.SMTPConfig.SMTPPasswd = "****" clonedConfig.MinIOConfig.SecretAccessKey = "****" - clonedConfig.OpenAIAPIKey = "****" - clonedConfig.OpenAICompatibleAPIKey = "****" - clonedConfig.OpenRouterAPIKey = "****" clonedConfig.SecretKey = "****" clonedConfig.AmapApplicationSecret = "****" @@ -175,5 +172,11 @@ func getConfigWithoutSensitiveData(config *settings.Config) *settings.Config { clonedConfig.WebDAVConfig.Password = "****" } + if clonedConfig.ReceiptImageRecognitionLLMConfig != nil { + clonedConfig.ReceiptImageRecognitionLLMConfig.OpenAIAPIKey = "****" + clonedConfig.ReceiptImageRecognitionLLMConfig.OpenAICompatibleAPIKey = "****" + clonedConfig.ReceiptImageRecognitionLLMConfig.OpenRouterAPIKey = "****" + } + return clonedConfig } diff --git a/cmd/webserver.go b/cmd/webserver.go index 3342a237..73c889a1 100644 --- a/cmd/webserver.go +++ b/cmd/webserver.go @@ -397,7 +397,7 @@ func startWebServer(c *core.CliContext) error { apiV1Route.POST("/transaction/templates/delete.json", bindApi(api.TransactionTemplates.TemplateDeleteHandler)) // Large Language Models - if config.LLMProvider != "" { + if config.ReceiptImageRecognitionLLMConfig != nil && config.ReceiptImageRecognitionLLMConfig.LLMProvider != "" { if config.TransactionFromAIImageRecognition { apiV1Route.POST("/llm/transactions/recognize_receipt_image.json", bindApi(api.LargeLanguageModels.RecognizeReceiptImageHandler)) } diff --git a/conf/ezbookkeeping.ini b/conf/ezbookkeeping.ini index 979547f9..2db2c3b3 100644 --- a/conf/ezbookkeeping.ini +++ b/conf/ezbookkeeping.ini @@ -165,14 +165,21 @@ webdav_proxy = system webdav_skip_tls_verify = false [llm] -# Large Language Model (LLM) provider, supports the following types: "openai", "openai_compatible", "openrouter", "ollama" +# Set to true to enable creating transactions from AI image recognition results, requires "llm_provider" and its related model id to be configured properly in "llm_image_recognition" section +transaction_from_ai_image_recognition = false + +# Maximum allowed AI recognition picture file size (1 - 4294967295 bytes) +max_ai_recognition_picture_size = 10485760 + +[llm_image_recognition] +# Large Language Model (LLM) provider for receipt image recognition, supports the following types: "openai", "openai_compatible", "openrouter", "ollama" llm_provider = # For "openai" llm provider only, OpenAI API secret key, please visit https://platform.openai.com/api-keys for more information openai_api_key = # For "openai" llm provider only, receipt image recognition model for creating transactions from images -openai_receipt_image_recognition_model_id = +openai_model_id = # For "openai_compatible" llm provider only, OpenAI compatible API base url, e.g. "https://api.openai.com/v1/" openai_compatible_base_url = @@ -181,25 +188,19 @@ openai_compatible_base_url = openai_compatible_api_key = # For "openai_compatible" llm provider only, receipt image recognition model for creating transactions from images -openai_compatible_receipt_image_recognition_model_id = +openai_compatible_model_id = # For "openrouter" llm provider only, OpenRouter API key, please visit https://openrouter.ai/settings/keys for more information openrouter_api_key = # For "openrouter" llm provider only, receipt image recognition model for creating transactions from images -openrouter_receipt_image_recognition_model_id = +openrouter_model_id = # For "ollama" llm provider only, Ollama server url, e.g. "http://127.0.0.1:11434/" ollama_server_url = # For "ollama" llm provider only, receipt image recognition model for creating transactions from images -ollama_receipt_image_recognition_model_id = - -# Set to true to enable creating transactions from AI image recognition results, requires llm_provider and its related receipt image recognition model to be configured properly -transaction_from_ai_image_recognition = false - -# Maximum allowed AI recognition picture file size (1 - 4294967295 bytes) -max_ai_recognition_picture_size = 10485760 +ollama_model_id = # Requesting large language model api timeout (0 - 4294967295 milliseconds) # Set to 0 to disable timeout for requesting large language model api, default is 60000 (60 seconds) diff --git a/pkg/api/large_language_models.go b/pkg/api/large_language_models.go index ca27dfc0..0d1a2cab 100644 --- a/pkg/api/large_language_models.go +++ b/pkg/api/large_language_models.go @@ -41,7 +41,7 @@ var ( // RecognizeReceiptImageHandler returns the recognized receipt image result func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext) (any, *errs.Error) { - if a.CurrentConfig().LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition { + if a.CurrentConfig().ReceiptImageRecognitionLLMConfig == nil || a.CurrentConfig().ReceiptImageRecognitionLLMConfig.LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition { return nil, errs.ErrLargeLanguageModelProviderNotEnabled } diff --git a/pkg/api/server_settings.go b/pkg/api/server_settings.go index 80c65fee..d8df47c8 100644 --- a/pkg/api/server_settings.go +++ b/pkg/api/server_settings.go @@ -47,7 +47,7 @@ func (a *ServerSettingsApi) ServerSettingsJavascriptHandler(c *core.WebContext) a.appendBooleanSetting(builder, "mcp", config.EnableMCPServer) } - if config.LLMProvider != "" { + if config.ReceiptImageRecognitionLLMConfig != nil && config.ReceiptImageRecognitionLLMConfig.LLMProvider != "" { if config.TransactionFromAIImageRecognition { a.appendBooleanSetting(builder, "llmt", config.TransactionFromAIImageRecognition) } diff --git a/pkg/llm/http_large_language_model_provider.go b/pkg/llm/http_large_language_model_provider.go index e7ea6bd9..2dae8487 100644 --- a/pkg/llm/http_large_language_model_provider.go +++ b/pkg/llm/http_large_language_model_provider.go @@ -16,13 +16,10 @@ import ( // HttpLargeLanguageModelProvider defines the structure of http large language model provider type HttpLargeLanguageModelProvider interface { // BuildTextualRequest returns the http request by the provider api definition - BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) + BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error) // ParseTextualResponse returns the textual response entity by the provider api definition ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) - - // GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string - GetReceiptImageRecognitionModelID() string } // CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider @@ -31,20 +28,16 @@ type CommonHttpLargeLanguageModelProvider struct { provider HttpLargeLanguageModelProvider } -// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the OpenAI common compatible large language model provider -func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { - return p.getTextualResponse(c, uid, currentConfig, request, p.provider.GetReceiptImageRecognitionModelID(), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) +// GetJsonResponse returns the json response from the OpenAI common compatible large language model provider +func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { + return p.getTextualResponse(c, uid, currentLLMConfig, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) } -func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) { - if modelId == "" { - return nil, errs.ErrInvalidLLMModelId - } - +func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) { transport := http.DefaultTransport.(*http.Transport).Clone() - utils.SetProxyUrl(transport, currentConfig.LargeLanguageModelAPIProxy) + utils.SetProxyUrl(transport, currentLLMConfig.LargeLanguageModelAPIProxy) - if currentConfig.LargeLanguageModelAPISkipTLSVerify { + if currentLLMConfig.LargeLanguageModelAPISkipTLSVerify { transport.TLSClientConfig = &tls.Config{ InsecureSkipVerify: true, } @@ -52,10 +45,10 @@ func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context client := &http.Client{ Transport: transport, - Timeout: time.Duration(currentConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond, + Timeout: time.Duration(currentLLMConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond, } - httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, modelId, responseType) + httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, responseType) if err != nil { log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error()) diff --git a/pkg/llm/large_language_model_provider.go b/pkg/llm/large_language_model_provider.go index 8ae58804..b0c559f8 100644 --- a/pkg/llm/large_language_model_provider.go +++ b/pkg/llm/large_language_model_provider.go @@ -7,6 +7,6 @@ import ( // LargeLanguageModelProvider defines the structure of large language model provider type LargeLanguageModelProvider interface { - // GetJsonResponseByReceiptImageRecognitionModel returns the json response from the large language model provider by receipt image recognition model - GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) + // GetJsonResponse returns the json response from the large language model provider + GetJsonResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) } diff --git a/pkg/llm/large_language_model_provider_container.go b/pkg/llm/large_language_model_provider_container.go index 307a8789..4204c138 100644 --- a/pkg/llm/large_language_model_provider_container.go +++ b/pkg/llm/large_language_model_provider_container.go @@ -8,7 +8,7 @@ import ( // LargeLanguageModelProviderContainer contains the current large language model provider type LargeLanguageModelProviderContainer struct { - current LargeLanguageModelProvider + receiptImageRecognitionCurrentProvider LargeLanguageModelProvider } // Initialize a large language model provider container singleton instance @@ -18,31 +18,40 @@ var ( // InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config func InitializeLargeLanguageModelProvider(config *settings.Config) error { - if config.LLMProvider == settings.OpenAILLMProvider { - Container.current = NewOpenAILargeLanguageModelProvider(config) - return nil - } else if config.LLMProvider == settings.OpenAICompatibleLLMProvider { - Container.current = NewOpenAICompatibleLargeLanguageModelProvider(config) - return nil - } else if config.LLMProvider == settings.OpenRouterLLMProvider { - Container.current = NewOpenRouterLargeLanguageModelProvider(config) - return nil - } else if config.LLMProvider == settings.OllamaLLMProvider { - Container.current = NewOllamaLargeLanguageModelProvider(config) - return nil - } else if config.LLMProvider == "" { - Container.current = nil - return nil + var err error = nil + + if config.ReceiptImageRecognitionLLMConfig != nil { + Container.receiptImageRecognitionCurrentProvider, err = initializeLargeLanguageModelProvider(config.ReceiptImageRecognitionLLMConfig) + + if err != nil { + return err + } } - return errs.ErrInvalidLLMProvider + return nil +} + +func initializeLargeLanguageModelProvider(llmConfig *settings.LLMConfig) (LargeLanguageModelProvider, error) { + if llmConfig.LLMProvider == settings.OpenAILLMProvider { + return NewOpenAILargeLanguageModelProvider(llmConfig), nil + } else if llmConfig.LLMProvider == settings.OpenAICompatibleLLMProvider { + return NewOpenAICompatibleLargeLanguageModelProvider(llmConfig), nil + } else if llmConfig.LLMProvider == settings.OpenRouterLLMProvider { + return NewOpenRouterLargeLanguageModelProvider(llmConfig), nil + } else if llmConfig.LLMProvider == settings.OllamaLLMProvider { + return NewOllamaLargeLanguageModelProvider(llmConfig), nil + } else if llmConfig.LLMProvider == "" { + return nil, nil + } + + return nil, errs.ErrInvalidLLMProvider } // GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { - if Container.current == nil { + if currentConfig.ReceiptImageRecognitionLLMConfig == nil || Container.receiptImageRecognitionCurrentProvider == nil { return nil, errs.ErrInvalidLLMProvider } - return l.current.GetJsonResponseByReceiptImageRecognitionModel(c, uid, currentConfig, request) + return l.receiptImageRecognitionCurrentProvider.GetJsonResponse(c, uid, currentConfig.ReceiptImageRecognitionLLMConfig, request) } diff --git a/pkg/llm/ollama_large_language_model_provider.go b/pkg/llm/ollama_large_language_model_provider.go index 5a69ddae..8d0b1815 100644 --- a/pkg/llm/ollama_large_language_model_provider.go +++ b/pkg/llm/ollama_large_language_model_provider.go @@ -18,13 +18,13 @@ const ollamaChatCompletionsPath = "api/chat" // OllamaLargeLanguageModelProvider defines the structure of Ollama large language model provider type OllamaLargeLanguageModelProvider struct { CommonHttpLargeLanguageModelProvider - OllamaServerURL string - ReceiptImageRecognitionModelID string + OllamaServerURL string + OllamaModelID string } // BuildTextualRequest returns the http request by Ollama provider -func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { - requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType) +func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { + requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType) if err != nil { return nil, err @@ -82,12 +82,16 @@ func (p *OllamaLargeLanguageModelProvider) ParseTextualResponse(c core.Context, return textualResponse, nil } -// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of Ollama provider -func (p *OllamaLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { - return p.ReceiptImageRecognitionModelID +// GetModelID returns the model id of Ollama provider +func (p *OllamaLargeLanguageModelProvider) GetModelID() string { + return p.OllamaModelID } -func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) { +func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) ([]byte, error) { + if p.OllamaModelID == "" { + return nil, errs.ErrInvalidLLMModelId + } + requestMessages := make([]any, 0) if request.SystemPrompt != "" { @@ -114,7 +118,7 @@ func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, } requestBody := make(map[string]any) - requestBody["model"] = modelId + requestBody["model"] = p.OllamaModelID requestBody["stream"] = request.Stream requestBody["messages"] = requestMessages @@ -145,9 +149,9 @@ func (p *OllamaLargeLanguageModelProvider) getOllamaRequestUrl() string { } // NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance -func NewOllamaLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { +func NewOllamaLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider { return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{ - OllamaServerURL: config.OllamaServerURL, - ReceiptImageRecognitionModelID: config.OllamaReceiptImageRecognitionModelID, + OllamaServerURL: llmConfig.OllamaServerURL, + OllamaModelID: llmConfig.OllamaModelID, }) } diff --git a/pkg/llm/ollama_large_language_model_provider_test.go b/pkg/llm/ollama_large_language_model_provider_test.go index 8f530528..93d5ea64 100644 --- a/pkg/llm/ollama_large_language_model_provider_test.go +++ b/pkg/llm/ollama_large_language_model_provider_test.go @@ -10,14 +10,16 @@ import ( ) func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) { - provider := &OllamaLargeLanguageModelProvider{} + provider := &OllamaLargeLanguageModelProvider{ + OllamaModelID: "test", + } request := &LargeLanguageModelRequest{ SystemPrompt: "You are a helpful assistant.", UserPrompt: []byte("Hello, how are you?"), } - bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) var body map[string]interface{} @@ -28,7 +30,9 @@ func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt } func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { - provider := &OllamaLargeLanguageModelProvider{} + provider := &OllamaLargeLanguageModelProvider{ + OllamaModelID: "test", + } request := &LargeLanguageModelRequest{ SystemPrompt: "What's in this image?", @@ -36,7 +40,7 @@ func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, } - bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) var body map[string]interface{} diff --git a/pkg/llm/openai_common_compatible_large_language_model_provider.go b/pkg/llm/openai_common_compatible_large_language_model_provider.go index 60717b7b..03d36742 100644 --- a/pkg/llm/openai_common_compatible_large_language_model_provider.go +++ b/pkg/llm/openai_common_compatible_large_language_model_provider.go @@ -20,8 +20,8 @@ type OpenAIChatCompletionsLargeLanguageModelProvider interface { // BuildChatCompletionsHttpRequest returns the chat completions http request BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) - // GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string - GetReceiptImageRecognitionModelID() string + // GetModelID returns the model id if supported, otherwise returns empty string + GetModelID() string } // OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api @@ -31,8 +31,8 @@ type OpenAICommonChatCompletionsHttpLargeLanguageModelProvider struct { } // BuildTextualRequest returns the http request by OpenAI common compatible provider -func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { - requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType) +func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { + requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType) if err != nil { return nil, err @@ -105,12 +105,11 @@ func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) ParseTextual return textualResponse, nil } -// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI common compatible provider -func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { - return p.provider.GetReceiptImageRecognitionModelID() -} +func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) ([]byte, error) { + if p.provider.GetModelID() == "" { + return nil, errs.ErrInvalidLLMModelId + } -func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) { requestMessages := make([]any, 0) if request.SystemPrompt != "" { @@ -143,7 +142,7 @@ func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonReq } requestBody := make(map[string]any) - requestBody["model"] = modelId + requestBody["model"] = p.provider.GetModelID() requestBody["stream"] = request.Stream requestBody["messages"] = requestMessages diff --git a/pkg/llm/openai_common_compatible_large_language_model_provider_test.go b/pkg/llm/openai_common_compatible_large_language_model_provider_test.go index f7e96958..19c3f32c 100644 --- a/pkg/llm/openai_common_compatible_large_language_model_provider_test.go +++ b/pkg/llm/openai_common_compatible_large_language_model_provider_test.go @@ -11,7 +11,9 @@ import ( func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) { provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ - provider: &OpenAILargeLanguageModelProvider{}, + provider: &OpenAILargeLanguageModelProvider{ + OpenAIModelID: "test", + }, } request := &LargeLanguageModelRequest{ @@ -19,7 +21,7 @@ func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequ UserPrompt: []byte("Hello, how are you?"), } - bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) var body map[string]interface{} @@ -31,7 +33,9 @@ func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequ func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ - provider: &OpenAILargeLanguageModelProvider{}, + provider: &OpenAILargeLanguageModelProvider{ + OpenAIModelID: "test", + }, } request := &LargeLanguageModelRequest{ @@ -40,7 +44,7 @@ func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequ UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, } - bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) var body map[string]interface{} diff --git a/pkg/llm/openai_compatible_large_language_model_provider.go b/pkg/llm/openai_compatible_large_language_model_provider.go index 741cc874..ce300462 100644 --- a/pkg/llm/openai_compatible_large_language_model_provider.go +++ b/pkg/llm/openai_compatible_large_language_model_provider.go @@ -12,9 +12,9 @@ const openAICompatibleChatCompletionsPath = "chat/completions" // OpenAICompatibleLargeLanguageModelProvider defines the structure of OpenAI compatible large language model provider type OpenAICompatibleLargeLanguageModelProvider struct { OpenAIChatCompletionsLargeLanguageModelProvider - OpenAICompatibleBaseURL string - OpenAICompatibleAPIKey string - ReceiptImageRecognitionModelID string + OpenAICompatibleBaseURL string + OpenAICompatibleAPIKey string + OpenAICompatibleModelID string } // BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider @@ -32,9 +32,9 @@ func (p *OpenAICompatibleLargeLanguageModelProvider) BuildChatCompletionsHttpReq return req, nil } -// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI compatible provider -func (p *OpenAICompatibleLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { - return p.ReceiptImageRecognitionModelID +// GetModelID returns the model id of OpenAI compatible provider +func (p *OpenAICompatibleLargeLanguageModelProvider) GetModelID() string { + return p.OpenAICompatibleModelID } func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string { @@ -49,10 +49,10 @@ func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequ } // NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance -func NewOpenAICompatibleLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { +func NewOpenAICompatibleLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider { return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{ - OpenAICompatibleBaseURL: config.OpenAICompatibleBaseURL, - OpenAICompatibleAPIKey: config.OpenAICompatibleAPIKey, - ReceiptImageRecognitionModelID: config.OpenAICompatibleReceiptImageRecognitionModelID, + OpenAICompatibleBaseURL: llmConfig.OpenAICompatibleBaseURL, + OpenAICompatibleAPIKey: llmConfig.OpenAICompatibleAPIKey, + OpenAICompatibleModelID: llmConfig.OpenAICompatibleModelID, }) } diff --git a/pkg/llm/openai_large_language_model_provider.go b/pkg/llm/openai_large_language_model_provider.go index 2bb51352..f1beb686 100644 --- a/pkg/llm/openai_large_language_model_provider.go +++ b/pkg/llm/openai_large_language_model_provider.go @@ -10,8 +10,8 @@ import ( // OpenAILargeLanguageModelProvider defines the structure of OpenAI large language model provider type OpenAILargeLanguageModelProvider struct { OpenAIChatCompletionsLargeLanguageModelProvider - OpenAIAPIKey string - ReceiptImageRecognitionModelID string + OpenAIAPIKey string + OpenAIModelID string } const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions" @@ -29,15 +29,15 @@ func (p *OpenAILargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c cor return req, nil } -// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI provider -func (p *OpenAILargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { - return p.ReceiptImageRecognitionModelID +// GetModelID returns the model id of OpenAI provider +func (p *OpenAILargeLanguageModelProvider) GetModelID() string { + return p.OpenAIModelID } // NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance -func NewOpenAILargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { +func NewOpenAILargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider { return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{ - OpenAIAPIKey: config.OpenAIAPIKey, - ReceiptImageRecognitionModelID: config.OpenAIReceiptImageRecognitionModelID, + OpenAIAPIKey: llmConfig.OpenAIAPIKey, + OpenAIModelID: llmConfig.OpenAIModelID, }) } diff --git a/pkg/llm/openrouter_large_language_model_provider.go b/pkg/llm/openrouter_large_language_model_provider.go index 16873bf0..169a59ed 100644 --- a/pkg/llm/openrouter_large_language_model_provider.go +++ b/pkg/llm/openrouter_large_language_model_provider.go @@ -10,8 +10,8 @@ import ( // OpenRouterLargeLanguageModelProvider defines the structure of OpenRouter large language model provider type OpenRouterLargeLanguageModelProvider struct { OpenAIChatCompletionsLargeLanguageModelProvider - OpenRouterAPIKey string - ReceiptImageRecognitionModelID string + OpenRouterAPIKey string + OpenRouterModelID string } const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions" @@ -31,15 +31,15 @@ func (p *OpenRouterLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c return req, nil } -// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenRouter provider -func (p *OpenRouterLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { - return p.ReceiptImageRecognitionModelID +// GetModelID returns the model id of OpenRouter provider +func (p *OpenRouterLargeLanguageModelProvider) GetModelID() string { + return p.OpenRouterModelID } // NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance -func NewOpenRouterLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { +func NewOpenRouterLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider { return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{ - OpenRouterAPIKey: config.OpenRouterAPIKey, - ReceiptImageRecognitionModelID: config.OpenRouterReceiptImageRecognitionModelID, + OpenRouterAPIKey: llmConfig.OpenRouterAPIKey, + OpenRouterModelID: llmConfig.OpenRouterModelID, }) } diff --git a/pkg/settings/setting.go b/pkg/settings/setting.go index f3db6068..35aeee0a 100644 --- a/pkg/settings/setting.go +++ b/pkg/settings/setting.go @@ -219,6 +219,23 @@ type WebDAVConfig struct { SkipTLSVerify bool } +// LLMConfig represents the Large Language Model setting config +type LLMConfig struct { + LLMProvider string + OpenAIAPIKey string + OpenAIModelID string + OpenAICompatibleBaseURL string + OpenAICompatibleAPIKey string + OpenAICompatibleModelID string + OpenRouterAPIKey string + OpenRouterModelID string + OllamaServerURL string + OllamaModelID string + LargeLanguageModelAPIRequestTimeout uint32 + LargeLanguageModelAPIProxy string + LargeLanguageModelAPISkipTLSVerify bool +} + // TipConfig represents a tip setting config type TipConfig struct { Enabled bool @@ -292,21 +309,11 @@ type Config struct { WebDAVConfig *WebDAVConfig // Large Language Model - LLMProvider string - OpenAIAPIKey string - OpenAIReceiptImageRecognitionModelID string - OpenAICompatibleBaseURL string - OpenAICompatibleAPIKey string - OpenAICompatibleReceiptImageRecognitionModelID string - OpenRouterAPIKey string - OpenRouterReceiptImageRecognitionModelID string - OllamaServerURL string - OllamaReceiptImageRecognitionModelID string - TransactionFromAIImageRecognition bool - MaxAIRecognitionPictureFileSize uint32 - LargeLanguageModelAPIRequestTimeout uint32 - LargeLanguageModelAPIProxy string - LargeLanguageModelAPISkipTLSVerify bool + TransactionFromAIImageRecognition bool + MaxAIRecognitionPictureFileSize uint32 + + // Large Language Model for Receipt Image Recognition + ReceiptImageRecognitionLLMConfig *LLMConfig // Uuid UuidGeneratorType string @@ -453,7 +460,13 @@ func LoadConfiguration(configFilePath string) (*Config, error) { return nil, err } - err = loadLLMConfiguration(config, cfgFile, "llm") + err = loadLLMGlobalConfiguration(config, cfgFile, "llm") + + if err != nil { + return nil, err + } + + config.ReceiptImageRecognitionLLMConfig, err = loadLLMConfiguration(cfgFile, "llm_image_recognition") if err != nil { return nil, err @@ -784,46 +797,51 @@ func loadStorageConfiguration(config *Config, configFile *ini.File, sectionName return nil } -func loadLLMConfiguration(config *Config, configFile *ini.File, sectionName string) error { - llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider") - - if llmProvider == "" { - config.LLMProvider = "" - } else if llmProvider == OpenAILLMProvider { - config.LLMProvider = OpenAILLMProvider - } else if llmProvider == OpenAICompatibleLLMProvider { - config.LLMProvider = OpenAICompatibleLLMProvider - } else if llmProvider == OpenRouterLLMProvider { - config.LLMProvider = OpenRouterLLMProvider - } else if llmProvider == OllamaLLMProvider { - config.LLMProvider = OllamaLLMProvider - } else { - return errs.ErrInvalidLLMProvider - } - - config.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key") - config.OpenAIReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_receipt_image_recognition_model_id") - - config.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url") - config.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key") - config.OpenAICompatibleReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_receipt_image_recognition_model_id") - - config.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key") - config.OpenRouterReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_receipt_image_recognition_model_id") - - config.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url") - config.OllamaReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "ollama_receipt_image_recognition_model_id") - +func loadLLMGlobalConfiguration(config *Config, configFile *ini.File, sectionName string) error { config.TransactionFromAIImageRecognition = getConfigItemBoolValue(configFile, sectionName, "transaction_from_ai_image_recognition", false) config.MaxAIRecognitionPictureFileSize = getConfigItemUint32Value(configFile, sectionName, "max_ai_recognition_picture_size", defaultAIRecognitionPictureMaxSize) - config.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system") - config.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout) - config.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false) - return nil } +func loadLLMConfiguration(configFile *ini.File, sectionName string) (*LLMConfig, error) { + llmConfig := &LLMConfig{} + llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider") + + if llmProvider == "" { + llmConfig.LLMProvider = "" + } else if llmProvider == OpenAILLMProvider { + llmConfig.LLMProvider = OpenAILLMProvider + } else if llmProvider == OpenAICompatibleLLMProvider { + llmConfig.LLMProvider = OpenAICompatibleLLMProvider + } else if llmProvider == OpenRouterLLMProvider { + llmConfig.LLMProvider = OpenRouterLLMProvider + } else if llmProvider == OllamaLLMProvider { + llmConfig.LLMProvider = OllamaLLMProvider + } else { + return nil, errs.ErrInvalidLLMProvider + } + + llmConfig.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key") + llmConfig.OpenAIModelID = getConfigItemStringValue(configFile, sectionName, "openai_model_id") + + llmConfig.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url") + llmConfig.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key") + llmConfig.OpenAICompatibleModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_model_id") + + llmConfig.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key") + llmConfig.OpenRouterModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_model_id") + + llmConfig.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url") + llmConfig.OllamaModelID = getConfigItemStringValue(configFile, sectionName, "ollama_model_id") + + llmConfig.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system") + llmConfig.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout) + llmConfig.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false) + + return llmConfig, nil +} + func loadUuidConfiguration(config *Config, configFile *ini.File, sectionName string) error { if getConfigItemStringValue(configFile, sectionName, "generator_type") == InternalUuidGeneratorType { config.UuidGeneratorType = InternalUuidGeneratorType