From 3d5b887e23c2d407b872bff5179a8c2e609cc02c Mon Sep 17 00:00:00 2001 From: MaysWind Date: Tue, 23 Sep 2025 00:50:44 +0800 Subject: [PATCH] support Google AI LLM provider --- conf/ezbookkeeping.ini | 8 +- ...large_language_model_provider_container.go | 4 +- .../google_ai_large_language_model_adapter.go | 167 ++++++++++++++++ ...le_ai_large_language_model_adapter_test.go | 181 ++++++++++++++++++ pkg/settings/setting.go | 8 + 5 files changed, 365 insertions(+), 3 deletions(-) create mode 100644 pkg/llm/provider/googleai/google_ai_large_language_model_adapter.go create mode 100644 pkg/llm/provider/googleai/google_ai_large_language_model_adapter_test.go diff --git a/conf/ezbookkeeping.ini b/conf/ezbookkeeping.ini index 2db2c3b3..65367337 100644 --- a/conf/ezbookkeeping.ini +++ b/conf/ezbookkeeping.ini @@ -172,7 +172,7 @@ transaction_from_ai_image_recognition = false max_ai_recognition_picture_size = 10485760 [llm_image_recognition] -# Large Language Model (LLM) provider for receipt image recognition, supports the following types: "openai", "openai_compatible", "openrouter", "ollama" +# Large Language Model (LLM) provider for receipt image recognition, supports the following types: "openai", "openai_compatible", "openrouter", "ollama", "google_ai" llm_provider = # For "openai" llm provider only, OpenAI API secret key, please visit https://platform.openai.com/api-keys for more information @@ -202,6 +202,12 @@ ollama_server_url = # For "ollama" llm provider only, receipt image recognition model for creating transactions from images ollama_model_id = +# For "google_ai" llm provider only, Google AI Studio API key, please visit https://aistudio.google.com/apikey for more information +google_ai_api_key = + +# For "google_ai" llm provider only, receipt image recognition model for creating transactions from images +google_ai_model_id = + # Requesting large language model api timeout (0 - 4294967295 milliseconds) # Set to 0 to disable timeout for requesting large language model api, default is 60000 (60 seconds) request_timeout = 60000 diff --git a/pkg/llm/large_language_model_provider_container.go b/pkg/llm/large_language_model_provider_container.go index 66779115..6652b290 100644 --- a/pkg/llm/large_language_model_provider_container.go +++ b/pkg/llm/large_language_model_provider_container.go @@ -5,7 +5,7 @@ import ( "github.com/mayswind/ezbookkeeping/pkg/errs" "github.com/mayswind/ezbookkeeping/pkg/llm/data" "github.com/mayswind/ezbookkeeping/pkg/llm/provider" - "github.com/mayswind/ezbookkeeping/pkg/llm/provider/google_ai" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider/googleai" "github.com/mayswind/ezbookkeeping/pkg/llm/provider/ollama" "github.com/mayswind/ezbookkeeping/pkg/llm/provider/openai" "github.com/mayswind/ezbookkeeping/pkg/settings" @@ -46,7 +46,7 @@ func initializeLargeLanguageModelProvider(llmConfig *settings.LLMConfig) (provid } else if llmConfig.LLMProvider == settings.OllamaLLMProvider { return ollama.NewOllamaLargeLanguageModelProvider(llmConfig), nil } else if llmConfig.LLMProvider == settings.GoogleAILLMProvider { - return google_ai.NewGoogleAILargeLanguageModelProvider(llmConfig), nil + return googleai.NewGoogleAILargeLanguageModelProvider(llmConfig), nil } else if llmConfig.LLMProvider == "" { return nil, nil } diff --git a/pkg/llm/provider/googleai/google_ai_large_language_model_adapter.go b/pkg/llm/provider/googleai/google_ai_large_language_model_adapter.go new file mode 100644 index 00000000..4381c0e0 --- /dev/null +++ b/pkg/llm/provider/googleai/google_ai_large_language_model_adapter.go @@ -0,0 +1,167 @@ +package googleai + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "fmt" + "net/http" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider/common" + "github.com/mayswind/ezbookkeeping/pkg/log" + "github.com/mayswind/ezbookkeeping/pkg/settings" +) + +const googleAIGenerateContentAPIFormat = "https://generativelanguage.googleapis.com/v1beta/models/%s:generateContent" + +// GoogleAILargeLanguageModelAdapter defines the structure of Google AI large language model adapter +type GoogleAILargeLanguageModelAdapter struct { + common.HttpLargeLanguageModelAdapter + GoogleAIAPIKey string + GoogleAIModelID string +} + +// GoogleAIGenerateContentRequest defines the structure of Google AI generate content request +type GoogleAIGenerateContentRequest struct { + Contents []*GoogleAIGenerateContentRequestContent `json:"contents"` +} + +// GoogleAIGenerateContentRequestContent defines the structure of Google AI generate content request content +type GoogleAIGenerateContentRequestContent struct { + Parts []*GoogleAIGenerateContentRequestContentPart `json:"parts"` +} + +// GoogleAIGenerateContentRequestContentPart defines the structure of Google AI generate content request content part +type GoogleAIGenerateContentRequestContentPart struct { + Text string `json:"text,omitempty"` + InlineData *GoogleAIGenerateContentRequestInlineData `json:"inlineData,omitempty"` +} + +// GoogleAIGenerateContentRequestInlineData defines the structure of Google AI generate content request inline data +type GoogleAIGenerateContentRequestInlineData struct { + MimeType string `json:"mimeType"` + Data string `json:"data"` +} + +// GoogleAIGenerateContentResponse defines the structure of Google AI generate content response +type GoogleAIGenerateContentResponse struct { + Candidates []*GoogleAIGenerateContentResponseCandidate `json:"candidates"` +} + +// GoogleAIGenerateContentResponseCandidate defines the structure of Google AI generate content response candidate +type GoogleAIGenerateContentResponseCandidate struct { + Content *GoogleAIGenerateContentResponseContent `json:"content"` +} + +// GoogleAIGenerateContentResponseContent defines the structure of Google AI generate content response content +type GoogleAIGenerateContentResponseContent struct { + Part []*GoogleAIGenerateContentResponseContentPart `json:"parts"` +} + +// GoogleAIGenerateContentResponseContentPart defines the structure of Google AI generate content response content part +type GoogleAIGenerateContentResponseContentPart struct { + Text *string `json:"text"` +} + +// BuildTextualRequest returns the http request by Google AI large language model adapter +func (p *GoogleAILargeLanguageModelAdapter) BuildTextualRequest(c core.Context, uid int64, request *data.LargeLanguageModelRequest, responseType data.LargeLanguageModelResponseFormat) (*http.Request, error) { + requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType) + + if err != nil { + return nil, err + } + + requestUrl := fmt.Sprintf(googleAIGenerateContentAPIFormat, p.GoogleAIModelID) + httpRequest, err := http.NewRequest("POST", requestUrl, bytes.NewReader(requestBody)) + + if err != nil { + return nil, err + } + + httpRequest.Header.Set("Content-Type", "application/json") + httpRequest.Header.Set("X-goog-api-key", p.GoogleAIAPIKey) + + return httpRequest, nil +} + +// ParseTextualResponse returns the textual response by Google AI large language model adapter +func (p *GoogleAILargeLanguageModelAdapter) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType data.LargeLanguageModelResponseFormat) (*data.LargeLanguageModelTextualResponse, error) { + generateContentResponse := &GoogleAIGenerateContentResponse{} + err := json.Unmarshal(body, &generateContentResponse) + + if err != nil { + log.Errorf(c, "[google_ai_large_language_model_adapter.ParseTextualResponse] failed to parse generate content response for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrFailedToRequestRemoteApi + } + + if generateContentResponse == nil || generateContentResponse.Candidates == nil || len(generateContentResponse.Candidates) < 1 || + generateContentResponse.Candidates[0].Content == nil || len(generateContentResponse.Candidates[0].Content.Part) < 1 || + generateContentResponse.Candidates[0].Content.Part[0].Text == nil { + log.Errorf(c, "[google_ai_large_language_model_adapter.ParseTextualResponse] generate content response is invalid for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + textualResponse := &data.LargeLanguageModelTextualResponse{ + Content: *generateContentResponse.Candidates[0].Content.Part[0].Text, + } + + return textualResponse, nil +} + +func (p *GoogleAILargeLanguageModelAdapter) buildJsonRequestBody(c core.Context, uid int64, request *data.LargeLanguageModelRequest, responseType data.LargeLanguageModelResponseFormat) ([]byte, error) { + if p.GoogleAIModelID == "" { + return nil, errs.ErrInvalidLLMModelId + } + + generateContentRequest := &GoogleAIGenerateContentRequest{ + Contents: []*GoogleAIGenerateContentRequestContent{ + { + Parts: make([]*GoogleAIGenerateContentRequestContentPart, 0, 2), + }, + }, + } + + if request.SystemPrompt != "" { + generateContentRequest.Contents[0].Parts = append(generateContentRequest.Contents[0].Parts, &GoogleAIGenerateContentRequestContentPart{ + Text: request.SystemPrompt, + }) + } + + if len(request.UserPrompt) > 0 { + if request.UserPromptType == data.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL { + imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt) + generateContentRequest.Contents[0].Parts = append(generateContentRequest.Contents[0].Parts, &GoogleAIGenerateContentRequestContentPart{ + InlineData: &GoogleAIGenerateContentRequestInlineData{ + MimeType: request.UserPromptContentType, + Data: imageBase64Data, + }, + }) + } else { + generateContentRequest.Contents[0].Parts = append(generateContentRequest.Contents[0].Parts, &GoogleAIGenerateContentRequestContentPart{ + Text: string(request.UserPrompt), + }) + } + } + + requestBodyBytes, err := json.Marshal(generateContentRequest) + + if err != nil { + log.Errorf(c, "[google_ai_large_language_model_adapter.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrOperationFailed + } + + log.Debugf(c, "[google_ai_large_language_model_adapter.buildJsonRequestBody] request body is %s", requestBodyBytes) + return requestBodyBytes, nil +} + +// NewGoogleAILargeLanguageModelProvider creates a new Google AI large language model provider instance +func NewGoogleAILargeLanguageModelProvider(llmConfig *settings.LLMConfig) provider.LargeLanguageModelProvider { + return common.NewCommonHttpLargeLanguageModelProvider(&GoogleAILargeLanguageModelAdapter{ + GoogleAIAPIKey: llmConfig.GoogleAIAPIKey, + GoogleAIModelID: llmConfig.GoogleAIModelID, + }) +} diff --git a/pkg/llm/provider/googleai/google_ai_large_language_model_adapter_test.go b/pkg/llm/provider/googleai/google_ai_large_language_model_adapter_test.go new file mode 100644 index 00000000..9d5ec32d --- /dev/null +++ b/pkg/llm/provider/googleai/google_ai_large_language_model_adapter_test.go @@ -0,0 +1,181 @@ +package googleai + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" +) + +func TestGoogleAILargeLanguageModelAdapter_buildJsonRequestBody_TextualUserPrompt(t *testing.T) { + adapter := &GoogleAILargeLanguageModelAdapter{ + GoogleAIModelID: "test", + } + + request := &data.LargeLanguageModelRequest{ + SystemPrompt: "You are a helpful assistant.", + UserPrompt: []byte("Hello, how are you?"), + } + + bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + + var body map[string]interface{} + err = json.Unmarshal(bodyBytes, &body) + assert.Nil(t, err) + + assert.Equal(t, "{\"contents\":[{\"parts\":[{\"text\":\"You are a helpful assistant.\"},{\"text\":\"Hello, how are you?\"}]}]}", string(bodyBytes)) +} + +func TestGoogleAILargeLanguageModelAdapter_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { + adapter := &GoogleAILargeLanguageModelAdapter{ + GoogleAIModelID: "test", + } + + request := &data.LargeLanguageModelRequest{ + SystemPrompt: "What's in this image?", + UserPrompt: []byte("fakedata"), + UserPromptType: data.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + UserPromptContentType: "image/png", + } + + bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + + var body map[string]interface{} + err = json.Unmarshal(bodyBytes, &body) + assert.Nil(t, err) + + assert.Equal(t, "{\"contents\":[{\"parts\":[{\"text\":\"What's in this image?\"},{\"inlineData\":{\"mimeType\":\"image/png\",\"data\":\"ZmFrZWRhdGE=\"}}]}]}", string(bodyBytes)) +} + +func TestGoogleAILargeLanguageModelAdapter_ParseTextualResponse_ValidJsonResponse(t *testing.T) { + adapter := &GoogleAILargeLanguageModelAdapter{ + GoogleAIModelID: "test", + } + + response := `{ + "responseId": "test-123", + "modelVersion": "test", + "usageMetadata": { + "promptTokenCount": 13, + "candidatesTokenCount": 7, + "totalTokenCount": 20 + }, + "candidates": [ + { + "content": { + "parts": [ + { + "text": "This is a test response" + } + ] + }, + "finish_reason": "stop", + "index": 0 + } + ] + }` + + result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + assert.Equal(t, "This is a test response", result.Content) +} + +func TestGoogleAILargeLanguageModelAdapter_ParseTextualResponse_EmptyResponse(t *testing.T) { + adapter := &GoogleAILargeLanguageModelAdapter{ + GoogleAIModelID: "test", + } + + response := `{ + "responseId": "test-123", + "modelVersion": "test", + "usageMetadata": { + "promptTokenCount": 13, + "candidatesTokenCount": 7, + "totalTokenCount": 20 + }, + "candidates": [ + { + "content": { + "parts": [ + { + "text": "" + } + ] + }, + "finish_reason": "stop", + "index": 0 + } + ] + }` + + result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + assert.Equal(t, "", result.Content) +} + +func TestGoogleAILargeLanguageModelAdapter_ParseTextualResponse_EmptyCandidates(t *testing.T) { + adapter := &GoogleAILargeLanguageModelAdapter{ + GoogleAIModelID: "test", + } + + response := `{ + "responseId": "test-123", + "modelVersion": "test", + "usageMetadata": { + "promptTokenCount": 13, + "candidatesTokenCount": 7, + "totalTokenCount": 20 + }, + "candidates": [] + }` + + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} + +func TestGoogleAILargeLanguageModelAdapter_ParseTextualResponse_NoPartText(t *testing.T) { + adapter := &GoogleAILargeLanguageModelAdapter{ + GoogleAIModelID: "test", + } + + response := `{ + "responseId": "test-123", + "modelVersion": "test", + "usageMetadata": { + "promptTokenCount": 13, + "candidatesTokenCount": 7, + "totalTokenCount": 20 + }, + "candidates": [ + { + "content": { + "parts": [ + { + } + ] + }, + "finish_reason": "stop", + "index": 0 + } + ] + }` + + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} + +func TestGoogleAILargeLanguageModelAdapter_ParseTextualResponse_InvalidJson(t *testing.T) { + adapter := &GoogleAILargeLanguageModelAdapter{ + GoogleAIModelID: "test", + } + + response := "error" + + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} diff --git a/pkg/settings/setting.go b/pkg/settings/setting.go index 35aeee0a..c6bc548a 100644 --- a/pkg/settings/setting.go +++ b/pkg/settings/setting.go @@ -71,6 +71,7 @@ const ( OpenAICompatibleLLMProvider string = "openai_compatible" OpenRouterLLMProvider string = "openrouter" OllamaLLMProvider string = "ollama" + GoogleAILLMProvider string = "google_ai" ) // Uuid generator types @@ -231,6 +232,8 @@ type LLMConfig struct { OpenRouterModelID string OllamaServerURL string OllamaModelID string + GoogleAIAPIKey string + GoogleAIModelID string LargeLanguageModelAPIRequestTimeout uint32 LargeLanguageModelAPIProxy string LargeLanguageModelAPISkipTLSVerify bool @@ -818,6 +821,8 @@ func loadLLMConfiguration(configFile *ini.File, sectionName string) (*LLMConfig, llmConfig.LLMProvider = OpenRouterLLMProvider } else if llmProvider == OllamaLLMProvider { llmConfig.LLMProvider = OllamaLLMProvider + } else if llmProvider == GoogleAILLMProvider { + llmConfig.LLMProvider = GoogleAILLMProvider } else { return nil, errs.ErrInvalidLLMProvider } @@ -835,6 +840,9 @@ func loadLLMConfiguration(configFile *ini.File, sectionName string) (*LLMConfig, llmConfig.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url") llmConfig.OllamaModelID = getConfigItemStringValue(configFile, sectionName, "ollama_model_id") + llmConfig.GoogleAIAPIKey = getConfigItemStringValue(configFile, sectionName, "google_ai_api_key") + llmConfig.GoogleAIModelID = getConfigItemStringValue(configFile, sectionName, "google_ai_model_id") + llmConfig.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system") llmConfig.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout) llmConfig.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false)