diff --git a/pkg/api/large_language_models.go b/pkg/api/large_language_models.go index 742b96a0..97d28dd1 100644 --- a/pkg/api/large_language_models.go +++ b/pkg/api/large_language_models.go @@ -10,6 +10,7 @@ import ( "github.com/mayswind/ezbookkeeping/pkg/core" "github.com/mayswind/ezbookkeeping/pkg/errs" "github.com/mayswind/ezbookkeeping/pkg/llm" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" "github.com/mayswind/ezbookkeeping/pkg/log" "github.com/mayswind/ezbookkeeping/pkg/models" "github.com/mayswind/ezbookkeeping/pkg/services" @@ -94,8 +95,9 @@ func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext } fileExtension := utils.GetFileNameExtension(imageFiles[0].Filename) + contentType := utils.GetImageContentType(fileExtension) - if utils.GetImageContentType(fileExtension) == "" { + if contentType == "" { log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the file extension \"%s\" of image in request is not supported for user \"uid:%d\"", fileExtension, uid) return nil, errs.ErrImageTypeNotSupported } @@ -209,11 +211,12 @@ func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext return nil, errs.Or(err, errs.ErrOperationFailed) } - llmRequest := &llm.LargeLanguageModelRequest{ - Stream: false, - SystemPrompt: strings.ReplaceAll(bodyBuffer.String(), "\r\n", "\n"), - UserPrompt: imageData, - UserPromptType: llm.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + llmRequest := &data.LargeLanguageModelRequest{ + Stream: false, + SystemPrompt: strings.ReplaceAll(bodyBuffer.String(), "\r\n", "\n"), + UserPrompt: imageData, + UserPromptType: data.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + UserPromptContentType: contentType, } llmResponse, err := llm.Container.GetJsonResponseByReceiptImageRecognitionModel(c, c.GetCurrentUid(), a.CurrentConfig(), llmRequest) diff --git a/pkg/llm/large_language_model_data.go b/pkg/llm/data/large_language_model_data.go similarity index 95% rename from pkg/llm/large_language_model_data.go rename to pkg/llm/data/large_language_model_data.go index f0c2d5cb..2f4bc8d7 100644 --- a/pkg/llm/large_language_model_data.go +++ b/pkg/llm/data/large_language_model_data.go @@ -1,4 +1,4 @@ -package llm +package data import "reflect" @@ -24,6 +24,7 @@ type LargeLanguageModelRequest struct { SystemPrompt string UserPrompt []byte UserPromptType LargeLanguageModelRequestPromptType + UserPromptContentType string ResponseJsonObjectType reflect.Type } diff --git a/pkg/llm/large_language_model_provider_container.go b/pkg/llm/large_language_model_provider_container.go index 4204c138..66779115 100644 --- a/pkg/llm/large_language_model_provider_container.go +++ b/pkg/llm/large_language_model_provider_container.go @@ -3,12 +3,17 @@ package llm import ( "github.com/mayswind/ezbookkeeping/pkg/core" "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider/google_ai" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider/ollama" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider/openai" "github.com/mayswind/ezbookkeeping/pkg/settings" ) // LargeLanguageModelProviderContainer contains the current large language model provider type LargeLanguageModelProviderContainer struct { - receiptImageRecognitionCurrentProvider LargeLanguageModelProvider + receiptImageRecognitionCurrentProvider provider.LargeLanguageModelProvider } // Initialize a large language model provider container singleton instance @@ -31,15 +36,17 @@ func InitializeLargeLanguageModelProvider(config *settings.Config) error { return nil } -func initializeLargeLanguageModelProvider(llmConfig *settings.LLMConfig) (LargeLanguageModelProvider, error) { +func initializeLargeLanguageModelProvider(llmConfig *settings.LLMConfig) (provider.LargeLanguageModelProvider, error) { if llmConfig.LLMProvider == settings.OpenAILLMProvider { - return NewOpenAILargeLanguageModelProvider(llmConfig), nil + return openai.NewOpenAILargeLanguageModelProvider(llmConfig), nil } else if llmConfig.LLMProvider == settings.OpenAICompatibleLLMProvider { - return NewOpenAICompatibleLargeLanguageModelProvider(llmConfig), nil + return openai.NewOpenAICompatibleLargeLanguageModelProvider(llmConfig), nil } else if llmConfig.LLMProvider == settings.OpenRouterLLMProvider { - return NewOpenRouterLargeLanguageModelProvider(llmConfig), nil + return openai.NewOpenRouterLargeLanguageModelProvider(llmConfig), nil } else if llmConfig.LLMProvider == settings.OllamaLLMProvider { - return NewOllamaLargeLanguageModelProvider(llmConfig), nil + return ollama.NewOllamaLargeLanguageModelProvider(llmConfig), nil + } else if llmConfig.LLMProvider == settings.GoogleAILLMProvider { + return google_ai.NewGoogleAILargeLanguageModelProvider(llmConfig), nil } else if llmConfig.LLMProvider == "" { return nil, nil } @@ -48,7 +55,7 @@ func initializeLargeLanguageModelProvider(llmConfig *settings.LLMConfig) (LargeL } // GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model -func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { +func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *data.LargeLanguageModelRequest) (*data.LargeLanguageModelTextualResponse, error) { if currentConfig.ReceiptImageRecognitionLLMConfig == nil || Container.receiptImageRecognitionCurrentProvider == nil { return nil, errs.ErrInvalidLLMProvider } diff --git a/pkg/llm/ollama_large_language_model_adapter.go b/pkg/llm/ollama_large_language_model_adapter.go deleted file mode 100644 index 37b71ace..00000000 --- a/pkg/llm/ollama_large_language_model_adapter.go +++ /dev/null @@ -1,152 +0,0 @@ -package llm - -import ( - "bytes" - "encoding/base64" - "encoding/json" - "net/http" - "strings" - - "github.com/mayswind/ezbookkeeping/pkg/core" - "github.com/mayswind/ezbookkeeping/pkg/errs" - "github.com/mayswind/ezbookkeeping/pkg/log" - "github.com/mayswind/ezbookkeeping/pkg/settings" -) - -const ollamaChatCompletionsPath = "api/chat" - -// OllamaLargeLanguageModelAdapter defines the structure of Ollama large language model adapter -type OllamaLargeLanguageModelAdapter struct { - HttpLargeLanguageModelAdapter - OllamaServerURL string - OllamaModelID string -} - -// BuildTextualRequest returns the http request by Ollama large language model adapter -func (p *OllamaLargeLanguageModelAdapter) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { - requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType) - - if err != nil { - return nil, err - } - - httpRequest, err := http.NewRequest("POST", p.getOllamaRequestUrl(), bytes.NewReader(requestBody)) - - if err != nil { - return nil, err - } - - httpRequest.Header.Set("Content-Type", "application/json") - - return httpRequest, nil -} - -// ParseTextualResponse returns the textual response by Ollama large language model adapter -func (p *OllamaLargeLanguageModelAdapter) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) { - responseBody := make(map[string]any) - err := json.Unmarshal(body, &responseBody) - - if err != nil { - log.Errorf(c, "[ollama_large_language_model_adapter.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error()) - return nil, errs.ErrFailedToRequestRemoteApi - } - - message, ok := responseBody["message"].(map[string]any) - - if !ok { - log.Errorf(c, "[ollama_large_language_model_adapter.ParseTextualResponse] no message found in response for user \"uid:%d\"", uid) - return nil, errs.ErrFailedToRequestRemoteApi - } - - content, ok := message["content"].(string) - - if !ok { - log.Errorf(c, "[ollama_large_language_model_adapter.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid) - return nil, errs.ErrFailedToRequestRemoteApi - } - - if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { - if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") { - content = strings.TrimPrefix(content, "```json") - content = strings.TrimSuffix(content, "```") - } else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") { - content = strings.TrimPrefix(content, "```") - content = strings.TrimSuffix(content, "```") - } - } - - textualResponse := &LargeLanguageModelTextualResponse{ - Content: content, - } - - return textualResponse, nil -} - -func (p *OllamaLargeLanguageModelAdapter) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) ([]byte, error) { - if p.OllamaModelID == "" { - return nil, errs.ErrInvalidLLMModelId - } - - requestMessages := make([]any, 0) - - if request.SystemPrompt != "" { - requestMessages = append(requestMessages, map[string]string{ - "role": "system", - "content": request.SystemPrompt, - }) - } - - if len(request.UserPrompt) > 0 { - if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL { - imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt) - requestMessages = append(requestMessages, map[string]any{ - "role": "user", - "content": "", - "images": []string{imageBase64Data}, - }) - } else { - requestMessages = append(requestMessages, map[string]string{ - "role": "user", - "content": string(request.UserPrompt), - }) - } - } - - requestBody := make(map[string]any) - requestBody["model"] = p.OllamaModelID - requestBody["stream"] = request.Stream - requestBody["messages"] = requestMessages - - if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { - requestBody["format"] = "json" - } - - requestBodyBytes, err := json.Marshal(requestBody) - - if err != nil { - log.Errorf(c, "[ollama_large_language_model_adapter.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error()) - return nil, errs.ErrOperationFailed - } - - log.Debugf(c, "[ollama_large_language_model_adapter.buildJsonRequestBody] request body is %s", requestBodyBytes) - return requestBodyBytes, nil -} - -func (p *OllamaLargeLanguageModelAdapter) getOllamaRequestUrl() string { - url := p.OllamaServerURL - - if url[len(url)-1] != '/' { - url += "/" - } - - url += ollamaChatCompletionsPath - return url -} - -// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance -func NewOllamaLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider { - return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelAdapter{ - OllamaServerURL: llmConfig.OllamaServerURL, - OllamaModelID: llmConfig.OllamaModelID, - }) -} diff --git a/pkg/llm/openai_common_compatible_large_language_model_adapter.go b/pkg/llm/openai_common_compatible_large_language_model_adapter.go deleted file mode 100644 index e7586d0c..00000000 --- a/pkg/llm/openai_common_compatible_large_language_model_adapter.go +++ /dev/null @@ -1,186 +0,0 @@ -package llm - -import ( - "bytes" - "encoding/base64" - "encoding/json" - "io" - "net/http" - "strings" - - "github.com/invopop/jsonschema" - - "github.com/mayswind/ezbookkeeping/pkg/core" - "github.com/mayswind/ezbookkeeping/pkg/errs" - "github.com/mayswind/ezbookkeeping/pkg/log" -) - -// OpenAIChatCompletionsAPIProvider defines the structure of OpenAI chat completions API provider -type OpenAIChatCompletionsAPIProvider interface { - // BuildChatCompletionsHttpRequest returns the chat completions http request - BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) - - // GetModelID returns the model id if supported, otherwise returns empty string - GetModelID() string -} - -// CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter defines the structure of OpenAI common compatible large language model adapter based on chat completions api -type CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter struct { - HttpLargeLanguageModelAdapter - apiProvider OpenAIChatCompletionsAPIProvider -} - -// BuildTextualRequest returns the http request by OpenAI common compatible adapter -func (p *CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { - requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType) - - if err != nil { - return nil, err - } - - httpRequest, err := p.apiProvider.BuildChatCompletionsHttpRequest(c, uid) - - if err != nil { - return nil, err - } - - httpRequest.Body = io.NopCloser(bytes.NewReader(requestBody)) - httpRequest.Header.Set("Content-Type", "application/json") - - return httpRequest, nil -} - -// ParseTextualResponse returns the textual response by OpenAI common compatible adapter -func (p *CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) { - responseBody := make(map[string]any) - err := json.Unmarshal(body, &responseBody) - - if err != nil { - log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error()) - return nil, errs.ErrFailedToRequestRemoteApi - } - - choices, ok := responseBody["choices"].([]any) - - if !ok || len(choices) < 1 { - log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.ParseTextualResponse] no choices found in response for user \"uid:%d\"", uid) - return nil, errs.ErrFailedToRequestRemoteApi - } - - firstChoice, ok := choices[0].(map[string]any) - - if !ok { - log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.ParseTextualResponse] invalid choice format in response for user \"uid:%d\"", uid) - return nil, errs.ErrFailedToRequestRemoteApi - } - - message, ok := firstChoice["message"].(map[string]any) - - if !ok { - log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.ParseTextualResponse] no message found in choice for user \"uid:%d\"", uid) - return nil, errs.ErrFailedToRequestRemoteApi - } - - content, ok := message["content"].(string) - - if !ok { - log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid) - return nil, errs.ErrFailedToRequestRemoteApi - } - - if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { - if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") { - content = strings.TrimPrefix(content, "```json") - content = strings.TrimSuffix(content, "```") - } else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") { - content = strings.TrimPrefix(content, "```") - content = strings.TrimSuffix(content, "```") - } - } - - textualResponse := &LargeLanguageModelTextualResponse{ - Content: content, - } - - return textualResponse, nil -} - -func (p *CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) ([]byte, error) { - if p.apiProvider.GetModelID() == "" { - return nil, errs.ErrInvalidLLMModelId - } - - requestMessages := make([]any, 0) - - if request.SystemPrompt != "" { - requestMessages = append(requestMessages, map[string]string{ - "role": "system", - "content": request.SystemPrompt, - }) - } - - if len(request.UserPrompt) > 0 { - if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL { - imageBase64Data := "data:image/png;base64," + base64.StdEncoding.EncodeToString(request.UserPrompt) - requestMessages = append(requestMessages, map[string]any{ - "role": "user", - "content": []any{ - core.O{ - "type": "image_url", - "image_url": core.O{ - "url": imageBase64Data, - }, - }, - }, - }) - } else { - requestMessages = append(requestMessages, map[string]string{ - "role": "user", - "content": string(request.UserPrompt), - }) - } - } - - requestBody := make(map[string]any) - requestBody["model"] = p.apiProvider.GetModelID() - requestBody["stream"] = request.Stream - requestBody["messages"] = requestMessages - - if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { - if request.ResponseJsonObjectType != nil { - schemeGenerator := jsonschema.Reflector{ - Anonymous: true, - DoNotReference: true, - ExpandedStruct: true, - } - - schema := schemeGenerator.ReflectFromType(request.ResponseJsonObjectType) - schema.Version = "" - - requestBody["response_format"] = core.O{ - "type": "json_schema", - "json_schema": schema, - } - } else { - requestBody["response_format"] = core.O{ - "type": "json_object", - } - } - } - - requestBodyBytes, err := json.Marshal(requestBody) - - if err != nil { - log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error()) - return nil, errs.ErrOperationFailed - } - - log.Debugf(c, "[openai_common_compatible_large_language_model_adapter.buildJsonRequestBody] request body is %s", requestBodyBytes) - return requestBodyBytes, nil -} - -func newCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter(apiProvider OpenAIChatCompletionsAPIProvider) LargeLanguageModelProvider { - return newCommonHttpLargeLanguageModelProvider(&CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter{ - apiProvider: apiProvider, - }) -} diff --git a/pkg/llm/common_http_large_language_model_provider.go b/pkg/llm/provider/common/common_http_large_language_model_provider.go similarity index 66% rename from pkg/llm/common_http_large_language_model_provider.go rename to pkg/llm/provider/common/common_http_large_language_model_provider.go index 7d8766f3..53713cd9 100644 --- a/pkg/llm/common_http_large_language_model_provider.go +++ b/pkg/llm/provider/common/common_http_large_language_model_provider.go @@ -1,13 +1,16 @@ -package llm +package common import ( "crypto/tls" "io" "net/http" + "strings" "time" "github.com/mayswind/ezbookkeeping/pkg/core" "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider" "github.com/mayswind/ezbookkeeping/pkg/log" "github.com/mayswind/ezbookkeeping/pkg/settings" "github.com/mayswind/ezbookkeeping/pkg/utils" @@ -16,24 +19,38 @@ import ( // HttpLargeLanguageModelAdapter defines the structure of http large language model adapter type HttpLargeLanguageModelAdapter interface { // BuildTextualRequest returns the http request by the provider api definition - BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*http.Request, error) + BuildTextualRequest(c core.Context, uid int64, request *data.LargeLanguageModelRequest, responseType data.LargeLanguageModelResponseFormat) (*http.Request, error) // ParseTextualResponse returns the textual response entity by the provider api definition - ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) + ParseTextualResponse(c core.Context, uid int64, body []byte, responseType data.LargeLanguageModelResponseFormat) (*data.LargeLanguageModelTextualResponse, error) } // CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider type CommonHttpLargeLanguageModelProvider struct { - LargeLanguageModelProvider + provider.LargeLanguageModelProvider adapter HttpLargeLanguageModelAdapter } // GetJsonResponse returns the json response from the OpenAI common compatible large language model provider -func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { - return p.getTextualResponse(c, uid, currentLLMConfig, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) +func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *data.LargeLanguageModelRequest) (*data.LargeLanguageModelTextualResponse, error) { + response, err := p.getTextualResponse(c, uid, currentLLMConfig, request, data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + + if err != nil { + return nil, err + } + + if strings.HasPrefix(response.Content, "```json") && strings.HasSuffix(response.Content, "```") { + response.Content = strings.TrimPrefix(response.Content, "```json") + response.Content = strings.TrimSuffix(response.Content, "```") + } else if strings.HasPrefix(response.Content, "```") && strings.HasSuffix(response.Content, "```") { + response.Content = strings.TrimPrefix(response.Content, "```") + response.Content = strings.TrimSuffix(response.Content, "```") + } + + return response, nil } -func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) { +func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *data.LargeLanguageModelRequest, responseType data.LargeLanguageModelResponseFormat) (*data.LargeLanguageModelTextualResponse, error) { transport := http.DefaultTransport.(*http.Transport).Clone() utils.SetProxyUrl(transport, currentLLMConfig.LargeLanguageModelAPIProxy) @@ -77,7 +94,8 @@ func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context return p.adapter.ParseTextualResponse(c, uid, body, responseType) } -func newCommonHttpLargeLanguageModelProvider(adapter HttpLargeLanguageModelAdapter) *CommonHttpLargeLanguageModelProvider { +// NewCommonHttpLargeLanguageModelProvider creates a http adapter based large language model provider instance +func NewCommonHttpLargeLanguageModelProvider(adapter HttpLargeLanguageModelAdapter) *CommonHttpLargeLanguageModelProvider { return &CommonHttpLargeLanguageModelProvider{ adapter: adapter, } diff --git a/pkg/llm/large_language_model_provider.go b/pkg/llm/provider/large_language_model_provider.go similarity index 68% rename from pkg/llm/large_language_model_provider.go rename to pkg/llm/provider/large_language_model_provider.go index b0c559f8..01e4cbd8 100644 --- a/pkg/llm/large_language_model_provider.go +++ b/pkg/llm/provider/large_language_model_provider.go @@ -1,12 +1,13 @@ -package llm +package provider import ( "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" "github.com/mayswind/ezbookkeeping/pkg/settings" ) // LargeLanguageModelProvider defines the structure of large language model provider type LargeLanguageModelProvider interface { // GetJsonResponse returns the json response from the large language model provider - GetJsonResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) + GetJsonResponse(c core.Context, uid int64, currentLLMConfig *settings.LLMConfig, request *data.LargeLanguageModelRequest) (*data.LargeLanguageModelTextualResponse, error) } diff --git a/pkg/llm/provider/ollama/ollama_large_language_model_adapter.go b/pkg/llm/provider/ollama/ollama_large_language_model_adapter.go new file mode 100644 index 00000000..b1770bc7 --- /dev/null +++ b/pkg/llm/provider/ollama/ollama_large_language_model_adapter.go @@ -0,0 +1,166 @@ +package ollama + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "net/http" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider/common" + "github.com/mayswind/ezbookkeeping/pkg/log" + "github.com/mayswind/ezbookkeeping/pkg/settings" +) + +const ollamaChatCompletionsPath = "api/chat" + +// OllamaLargeLanguageModelAdapter defines the structure of Ollama large language model adapter +type OllamaLargeLanguageModelAdapter struct { + common.HttpLargeLanguageModelAdapter + OllamaServerURL string + OllamaModelID string +} + +// OllamaMessageRole defines the role of Ollama chat message +type OllamaMessageRole string + +const ( + OllamaMessageRoleSystem OllamaMessageRole = "system" + OllamaMessageRoleUser OllamaMessageRole = "user" +) + +// OllamaChatRequest defines the structure of Ollama chat request +type OllamaChatRequest struct { + Model string `json:"model"` + Stream bool `json:"stream"` + Messages []*OllamaChatRequestMessage `json:"messages"` + Format string `json:"format,omitempty"` +} + +// OllamaChatRequestMessage defines the structure of Ollama chat request message +type OllamaChatRequestMessage struct { + Role OllamaMessageRole `json:"role"` + Content string `json:"content"` + Images []string `json:"images,omitempty"` +} + +// OllamaChatResponse defines the structure of Ollama chat response +type OllamaChatResponse struct { + Message *OllamaChatResponseMessage `json:"message"` +} + +// OllamaChatResponseMessage defines the structure of Ollama chat response message +type OllamaChatResponseMessage struct { + Content *string `json:"content"` +} + +// BuildTextualRequest returns the http request by Ollama large language model adapter +func (p *OllamaLargeLanguageModelAdapter) BuildTextualRequest(c core.Context, uid int64, request *data.LargeLanguageModelRequest, responseType data.LargeLanguageModelResponseFormat) (*http.Request, error) { + requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType) + + if err != nil { + return nil, err + } + + httpRequest, err := http.NewRequest("POST", p.getOllamaRequestUrl(), bytes.NewReader(requestBody)) + + if err != nil { + return nil, err + } + + httpRequest.Header.Set("Content-Type", "application/json") + + return httpRequest, nil +} + +// ParseTextualResponse returns the textual response by Ollama large language model adapter +func (p *OllamaLargeLanguageModelAdapter) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType data.LargeLanguageModelResponseFormat) (*data.LargeLanguageModelTextualResponse, error) { + chatResponse := &OllamaChatResponse{} + err := json.Unmarshal(body, &chatResponse) + + if err != nil { + log.Errorf(c, "[ollama_large_language_model_adapter.ParseTextualResponse] failed to parse chat response for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrFailedToRequestRemoteApi + } + + if chatResponse == nil || chatResponse.Message == nil || chatResponse.Message.Content == nil { + log.Errorf(c, "[ollama_large_language_model_adapter.ParseTextualResponse] chat response is invalid for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + textualResponse := &data.LargeLanguageModelTextualResponse{ + Content: *chatResponse.Message.Content, + } + + return textualResponse, nil +} + +func (p *OllamaLargeLanguageModelAdapter) buildJsonRequestBody(c core.Context, uid int64, request *data.LargeLanguageModelRequest, responseType data.LargeLanguageModelResponseFormat) ([]byte, error) { + if p.OllamaModelID == "" { + return nil, errs.ErrInvalidLLMModelId + } + + chatRequest := &OllamaChatRequest{ + Model: p.OllamaModelID, + Stream: request.Stream, + Messages: make([]*OllamaChatRequestMessage, 0, 2), + } + + if request.SystemPrompt != "" { + chatRequest.Messages = append(chatRequest.Messages, &OllamaChatRequestMessage{ + Role: OllamaMessageRoleSystem, + Content: request.SystemPrompt, + }) + } + + if len(request.UserPrompt) > 0 { + if request.UserPromptType == data.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL { + imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt) + chatRequest.Messages = append(chatRequest.Messages, &OllamaChatRequestMessage{ + Role: OllamaMessageRoleUser, + Images: []string{imageBase64Data}, + }) + } else { + chatRequest.Messages = append(chatRequest.Messages, &OllamaChatRequestMessage{ + Role: OllamaMessageRoleUser, + Content: string(request.UserPrompt), + }) + } + } + + if responseType == data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { + chatRequest.Format = "json" + } + + requestBodyBytes, err := json.Marshal(chatRequest) + + if err != nil { + log.Errorf(c, "[ollama_large_language_model_adapter.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrOperationFailed + } + + log.Debugf(c, "[ollama_large_language_model_adapter.buildJsonRequestBody] request body is %s", requestBodyBytes) + return requestBodyBytes, nil +} + +func (p *OllamaLargeLanguageModelAdapter) getOllamaRequestUrl() string { + url := p.OllamaServerURL + + if url[len(url)-1] != '/' { + url += "/" + } + + url += ollamaChatCompletionsPath + return url +} + +// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance +func NewOllamaLargeLanguageModelProvider(llmConfig *settings.LLMConfig) provider.LargeLanguageModelProvider { + return common.NewCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelAdapter{ + OllamaServerURL: llmConfig.OllamaServerURL, + OllamaModelID: llmConfig.OllamaModelID, + }) +} diff --git a/pkg/llm/ollama_large_language_model_adapter_test.go b/pkg/llm/provider/ollama/ollama_large_language_model_adapter_test.go similarity index 74% rename from pkg/llm/ollama_large_language_model_adapter_test.go rename to pkg/llm/provider/ollama/ollama_large_language_model_adapter_test.go index 2c82eceb..a81eaf7b 100644 --- a/pkg/llm/ollama_large_language_model_adapter_test.go +++ b/pkg/llm/provider/ollama/ollama_large_language_model_adapter_test.go @@ -1,9 +1,10 @@ -package llm +package ollama import ( "encoding/json" "testing" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" "github.com/stretchr/testify/assert" "github.com/mayswind/ezbookkeeping/pkg/core" @@ -14,19 +15,19 @@ func TestOllamaLargeLanguageModelAdapter_buildJsonRequestBody_TextualUserPrompt( OllamaModelID: "test", } - request := &LargeLanguageModelRequest{ + request := &data.LargeLanguageModelRequest{ SystemPrompt: "You are a helpful assistant.", UserPrompt: []byte("Hello, how are you?"), } - bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) var body map[string]interface{} err = json.Unmarshal(bodyBytes, &body) assert.Nil(t, err) - assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes)) + assert.Equal(t, "{\"model\":\"test\",\"stream\":false,\"messages\":[{\"role\":\"system\",\"content\":\"You are a helpful assistant.\"},{\"role\":\"user\",\"content\":\"Hello, how are you?\"}],\"format\":\"json\"}", string(bodyBytes)) } func TestOllamaLargeLanguageModelAdapter_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { @@ -34,20 +35,20 @@ func TestOllamaLargeLanguageModelAdapter_buildJsonRequestBody_ImageUserPrompt(t OllamaModelID: "test", } - request := &LargeLanguageModelRequest{ + request := &data.LargeLanguageModelRequest{ SystemPrompt: "What's in this image?", UserPrompt: []byte("fakedata"), - UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + UserPromptType: data.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, } - bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) var body map[string]interface{} err = json.Unmarshal(bodyBytes, &body) assert.Nil(t, err) - assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":\"\",\"images\":[\"ZmFrZWRhdGE=\"],\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes)) + assert.Equal(t, "{\"model\":\"test\",\"stream\":false,\"messages\":[{\"role\":\"system\",\"content\":\"What's in this image?\"},{\"role\":\"user\",\"content\":\"\",\"images\":[\"ZmFrZWRhdGE=\"]}],\"format\":\"json\"}", string(bodyBytes)) } func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_ValidJsonResponse(t *testing.T) { @@ -62,7 +63,7 @@ func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_ValidJsonResponse( } }` - result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) assert.Equal(t, "This is a test response", result.Content) } @@ -79,12 +80,12 @@ func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_EmptyResponse(t *t } }` - result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) assert.Equal(t, "", result.Content) } -func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_EmptyChoices(t *testing.T) { +func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_EmptyMessage(t *testing.T) { adapter := &OllamaLargeLanguageModelAdapter{} response := `{ @@ -93,11 +94,11 @@ func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_EmptyChoices(t *te "message": {} }` - _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.EqualError(t, err, "failed to request third party api") } -func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_NoChoiceContent(t *testing.T) { +func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_NoContentFieldInMessage(t *testing.T) { adapter := &OllamaLargeLanguageModelAdapter{} response := `{ @@ -108,7 +109,7 @@ func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_NoChoiceContent(t } }` - _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.EqualError(t, err, "failed to request third party api") } @@ -117,7 +118,7 @@ func TestOllamaLargeLanguageModelAdapter_ParseTextualResponse_InvalidJson(t *tes response := "error" - _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.EqualError(t, err, "failed to request third party api") } diff --git a/pkg/llm/openai_chat_completions_api_provider.go b/pkg/llm/provider/openai/openai_chat_completions_api_provider.go similarity index 92% rename from pkg/llm/openai_chat_completions_api_provider.go rename to pkg/llm/provider/openai/openai_chat_completions_api_provider.go index be14f9d6..8bf60c54 100644 --- a/pkg/llm/openai_chat_completions_api_provider.go +++ b/pkg/llm/provider/openai/openai_chat_completions_api_provider.go @@ -1,9 +1,10 @@ -package llm +package openai import ( "net/http" "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider" "github.com/mayswind/ezbookkeeping/pkg/settings" ) @@ -35,7 +36,7 @@ func (p *OpenAIOfficialChatCompletionsAPIProvider) GetModelID() string { } // NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance -func NewOpenAILargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider { +func NewOpenAILargeLanguageModelProvider(llmConfig *settings.LLMConfig) provider.LargeLanguageModelProvider { return newCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter(&OpenAIOfficialChatCompletionsAPIProvider{ OpenAIAPIKey: llmConfig.OpenAIAPIKey, OpenAIModelID: llmConfig.OpenAIModelID, diff --git a/pkg/llm/provider/openai/openai_common_compatible_large_language_model_adapter.go b/pkg/llm/provider/openai/openai_common_compatible_large_language_model_adapter.go new file mode 100644 index 00000000..fb9a1c92 --- /dev/null +++ b/pkg/llm/provider/openai/openai_common_compatible_large_language_model_adapter.go @@ -0,0 +1,219 @@ +package openai + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "io" + "net/http" + + "github.com/invopop/jsonschema" + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider/common" + "github.com/mayswind/ezbookkeeping/pkg/log" +) + +// OpenAIChatCompletionsAPIProvider defines the structure of OpenAI chat completions API provider +type OpenAIChatCompletionsAPIProvider interface { + // BuildChatCompletionsHttpRequest returns the chat completions http request + BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) + + // GetModelID returns the model id if supported, otherwise returns empty string + GetModelID() string +} + +// CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter defines the structure of OpenAI common compatible large language model adapter based on chat completions api +type CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter struct { + common.HttpLargeLanguageModelAdapter + apiProvider OpenAIChatCompletionsAPIProvider +} + +// OpenAIMessageRole defines the role of OpenAI chat completions message +type OpenAIMessageRole string + +// OpenAI Message Roles +const ( + OpenAIMessageRoleSystem OpenAIMessageRole = "system" + OpenAIMessageRoleUser OpenAIMessageRole = "user" +) + +// OpenAIChatCompletionsRequestResponseFormatType defines the type of OpenAI chat completions request response format +type OpenAIChatCompletionsRequestResponseFormatType string + +// OpenAI Chat Completions Request Response Format Types +const ( + OpenAIChatCompletionsRequestResponseFormatTypeJsonObject OpenAIChatCompletionsRequestResponseFormatType = "json_object" + OpenAIChatCompletionsRequestResponseFormatTypeJsonSchema OpenAIChatCompletionsRequestResponseFormatType = "json_schema" +) + +// OpenAIChatCompletionsRequest defines the structure of OpenAI chat completions request +type OpenAIChatCompletionsRequest struct { + Model string `json:"model"` + Stream bool `json:"stream"` + Messages []any `json:"messages"` + ResponseFormat *OpenAIChatCompletionsRequestResponseFormat `json:"response_format,omitempty"` +} + +// OpenAIChatCompletionsRequestMessage defines the structure of OpenAI chat completions request message +type OpenAIChatCompletionsRequestMessage[T string | []*OpenAIChatCompletionsRequestImageContent] struct { + Role OpenAIMessageRole `json:"role"` + Content T `json:"content"` +} + +// OpenAIChatCompletionsRequestImageContent defines the structure of OpenAI chat completions request image content +type OpenAIChatCompletionsRequestImageContent struct { + Type string `json:"type"` + ImageURL *OpenAIChatCompletionsRequestImageUrl `json:"image_url"` +} + +// OpenAIChatCompletionsRequestResponseFormat defines the structure of OpenAI chat completions request response format +type OpenAIChatCompletionsRequestResponseFormat struct { + Type OpenAIChatCompletionsRequestResponseFormatType `json:"type"` + JsonSchema *jsonschema.Schema `json:"json_schema,omitempty"` +} + +// OpenAIChatCompletionsRequestImageUrl defines the structure of OpenAI image url +type OpenAIChatCompletionsRequestImageUrl struct { + Url string `json:"url"` +} + +// OpenAIChatCompletionsResponse defines the structure of OpenAI chat completions response +type OpenAIChatCompletionsResponse struct { + Choices []*OpenAIChatCompletionsResponseChoice `json:"choices"` +} + +// OpenAIChatCompletionsResponseChoice defines the structure of OpenAI chat completions response choice +type OpenAIChatCompletionsResponseChoice struct { + Message *OpenAIChatCompletionsResponseMessage `json:"message"` +} + +// OpenAIChatCompletionsResponseMessage defines the structure of OpenAI chat completions response message +type OpenAIChatCompletionsResponseMessage struct { + Content *string `json:"content"` +} + +// BuildTextualRequest returns the http request by OpenAI common compatible adapter +func (p *CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter) BuildTextualRequest(c core.Context, uid int64, request *data.LargeLanguageModelRequest, responseType data.LargeLanguageModelResponseFormat) (*http.Request, error) { + requestBody, err := p.buildJsonRequestBody(c, uid, request, responseType) + + if err != nil { + return nil, err + } + + httpRequest, err := p.apiProvider.BuildChatCompletionsHttpRequest(c, uid) + + if err != nil { + return nil, err + } + + httpRequest.Body = io.NopCloser(bytes.NewReader(requestBody)) + httpRequest.Header.Set("Content-Type", "application/json") + + return httpRequest, nil +} + +// ParseTextualResponse returns the textual response by OpenAI common compatible adapter +func (p *CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType data.LargeLanguageModelResponseFormat) (*data.LargeLanguageModelTextualResponse, error) { + chatCompletionsResponse := &OpenAIChatCompletionsResponse{} + err := json.Unmarshal(body, &chatCompletionsResponse) + + if err != nil { + log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.ParseTextualResponse] failed to parse chat completions response for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrFailedToRequestRemoteApi + } + + if chatCompletionsResponse == nil || chatCompletionsResponse.Choices == nil || len(chatCompletionsResponse.Choices) < 1 || + chatCompletionsResponse.Choices[0].Message == nil || + chatCompletionsResponse.Choices[0].Message.Content == nil { + log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.ParseTextualResponse] chat completions response is invalid for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + textualResponse := &data.LargeLanguageModelTextualResponse{ + Content: *chatCompletionsResponse.Choices[0].Message.Content, + } + + return textualResponse, nil +} + +func (p *CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter) buildJsonRequestBody(c core.Context, uid int64, request *data.LargeLanguageModelRequest, responseType data.LargeLanguageModelResponseFormat) ([]byte, error) { + if p.apiProvider.GetModelID() == "" { + return nil, errs.ErrInvalidLLMModelId + } + + chatCompletionsRequest := &OpenAIChatCompletionsRequest{ + Model: p.apiProvider.GetModelID(), + Stream: request.Stream, + Messages: make([]any, 0, 2), + } + + if request.SystemPrompt != "" { + chatCompletionsRequest.Messages = append(chatCompletionsRequest.Messages, &OpenAIChatCompletionsRequestMessage[string]{ + Role: OpenAIMessageRoleSystem, + Content: request.SystemPrompt, + }) + } + + if len(request.UserPrompt) > 0 { + if request.UserPromptType == data.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL { + imageBase64Data := "data:" + request.UserPromptContentType + ";base64," + base64.StdEncoding.EncodeToString(request.UserPrompt) + chatCompletionsRequest.Messages = append(chatCompletionsRequest.Messages, &OpenAIChatCompletionsRequestMessage[[]*OpenAIChatCompletionsRequestImageContent]{ + Role: OpenAIMessageRoleUser, + Content: []*OpenAIChatCompletionsRequestImageContent{ + { + Type: "image_url", + ImageURL: &OpenAIChatCompletionsRequestImageUrl{ + Url: imageBase64Data, + }, + }, + }, + }) + } else { + chatCompletionsRequest.Messages = append(chatCompletionsRequest.Messages, &OpenAIChatCompletionsRequestMessage[string]{ + Role: OpenAIMessageRoleUser, + Content: string(request.UserPrompt), + }) + } + } + + if responseType == data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { + if request.ResponseJsonObjectType != nil { + schemeGenerator := jsonschema.Reflector{ + Anonymous: true, + DoNotReference: true, + ExpandedStruct: true, + } + + schema := schemeGenerator.ReflectFromType(request.ResponseJsonObjectType) + schema.Version = "" + + chatCompletionsRequest.ResponseFormat = &OpenAIChatCompletionsRequestResponseFormat{ + Type: OpenAIChatCompletionsRequestResponseFormatTypeJsonSchema, + JsonSchema: schema, + } + } else { + chatCompletionsRequest.ResponseFormat = &OpenAIChatCompletionsRequestResponseFormat{ + Type: OpenAIChatCompletionsRequestResponseFormatTypeJsonObject, + } + } + } + + requestBodyBytes, err := json.Marshal(chatCompletionsRequest) + + if err != nil { + log.Errorf(c, "[openai_common_compatible_large_language_model_adapter.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrOperationFailed + } + + log.Debugf(c, "[openai_common_compatible_large_language_model_adapter.buildJsonRequestBody] request body is %s", requestBodyBytes) + return requestBodyBytes, nil +} + +func newCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter(apiProvider OpenAIChatCompletionsAPIProvider) provider.LargeLanguageModelProvider { + return common.NewCommonHttpLargeLanguageModelProvider(&CommonOpenAIChatCompletionsAPILargeLanguageModelAdapter{ + apiProvider: apiProvider, + }) +} diff --git a/pkg/llm/openai_common_compatible_large_language_model_adapter_test.go b/pkg/llm/provider/openai/openai_common_compatible_large_language_model_adapter_test.go similarity index 73% rename from pkg/llm/openai_common_compatible_large_language_model_adapter_test.go rename to pkg/llm/provider/openai/openai_common_compatible_large_language_model_adapter_test.go index 4259490a..07ad9cba 100644 --- a/pkg/llm/openai_common_compatible_large_language_model_adapter_test.go +++ b/pkg/llm/provider/openai/openai_common_compatible_large_language_model_adapter_test.go @@ -1,4 +1,4 @@ -package llm +package openai import ( "encoding/json" @@ -7,6 +7,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/llm/data" ) func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_buildJsonRequestBody_TextualUserPrompt(t *testing.T) { @@ -16,19 +17,19 @@ func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_buildJsonReques }, } - request := &LargeLanguageModelRequest{ + request := &data.LargeLanguageModelRequest{ SystemPrompt: "You are a helpful assistant.", UserPrompt: []byte("Hello, how are you?"), } - bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) var body map[string]interface{} err = json.Unmarshal(bodyBytes, &body) assert.Nil(t, err) - assert.Equal(t, "{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes)) + assert.Equal(t, "{\"model\":\"test\",\"stream\":false,\"messages\":[{\"role\":\"system\",\"content\":\"You are a helpful assistant.\"},{\"role\":\"user\",\"content\":\"Hello, how are you?\"}],\"response_format\":{\"type\":\"json_object\"}}", string(bodyBytes)) } func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { @@ -38,20 +39,21 @@ func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_buildJsonReques }, } - request := &LargeLanguageModelRequest{ - SystemPrompt: "What's in this image?", - UserPrompt: []byte("fakedata"), - UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + request := &data.LargeLanguageModelRequest{ + SystemPrompt: "What's in this image?", + UserPrompt: []byte("fakedata"), + UserPromptType: data.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + UserPromptContentType: "image/png", } - bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + bodyBytes, err := adapter.buildJsonRequestBody(core.NewNullContext(), 0, request, data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) var body map[string]interface{} err = json.Unmarshal(bodyBytes, &body) assert.Nil(t, err) - assert.Equal(t, "{\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":[{\"image_url\":{\"url\":\"data:image/png;base64,ZmFrZWRhdGE=\"},\"type\":\"image_url\"}],\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes)) + assert.Equal(t, "{\"model\":\"test\",\"stream\":false,\"messages\":[{\"role\":\"system\",\"content\":\"What's in this image?\"},{\"role\":\"user\",\"content\":[{\"type\":\"image_url\",\"image_url\":{\"url\":\"data:image/png;base64,ZmFrZWRhdGE=\"}}]}],\"response_format\":{\"type\":\"json_object\"}}", string(bodyBytes)) } func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_ParseTextualResponse_ValidJsonResponse(t *testing.T) { @@ -81,7 +83,7 @@ func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_ParseTextualRes ] }` - result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) assert.Equal(t, "This is a test response", result.Content) } @@ -106,7 +108,7 @@ func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_ParseTextualRes ] }` - result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + result, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.Nil(t, err) assert.Equal(t, "", result.Content) } @@ -122,7 +124,7 @@ func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_ParseTextualRes "choices": [] }` - _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.EqualError(t, err, "failed to request third party api") } @@ -145,7 +147,7 @@ func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_ParseTextualRes ] }` - _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.EqualError(t, err, "failed to request third party api") } @@ -156,6 +158,6 @@ func TestCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter_ParseTextualRes response := "error" - _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + _, err := adapter.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), data.LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) assert.EqualError(t, err, "failed to request third party api") } diff --git a/pkg/llm/openai_compatible_chat_completions_api_provider.go b/pkg/llm/provider/openai/openai_compatible_chat_completions_api_provider.go similarity index 93% rename from pkg/llm/openai_compatible_chat_completions_api_provider.go rename to pkg/llm/provider/openai/openai_compatible_chat_completions_api_provider.go index bc456747..09f32021 100644 --- a/pkg/llm/openai_compatible_chat_completions_api_provider.go +++ b/pkg/llm/provider/openai/openai_compatible_chat_completions_api_provider.go @@ -1,9 +1,10 @@ -package llm +package openai import ( "net/http" "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider" "github.com/mayswind/ezbookkeeping/pkg/settings" ) @@ -49,7 +50,7 @@ func (p *OpenAICompatibleChatCompletionsAPIProvider) getFinalChatCompletionsRequ } // NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance -func NewOpenAICompatibleLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider { +func NewOpenAICompatibleLargeLanguageModelProvider(llmConfig *settings.LLMConfig) provider.LargeLanguageModelProvider { return newCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter(&OpenAICompatibleChatCompletionsAPIProvider{ OpenAICompatibleBaseURL: llmConfig.OpenAICompatibleBaseURL, OpenAICompatibleAPIKey: llmConfig.OpenAICompatibleAPIKey, diff --git a/pkg/llm/openai_compatible_chat_completions_api_provider_test.go b/pkg/llm/provider/openai/openai_compatible_chat_completions_api_provider_test.go similarity index 98% rename from pkg/llm/openai_compatible_chat_completions_api_provider_test.go rename to pkg/llm/provider/openai/openai_compatible_chat_completions_api_provider_test.go index 8040811e..e4aab3e4 100644 --- a/pkg/llm/openai_compatible_chat_completions_api_provider_test.go +++ b/pkg/llm/provider/openai/openai_compatible_chat_completions_api_provider_test.go @@ -1,4 +1,4 @@ -package llm +package openai import ( "testing" diff --git a/pkg/llm/openrouter_chat_completions_api_provider.go b/pkg/llm/provider/openai/openrouter_chat_completions_api_provider.go similarity index 92% rename from pkg/llm/openrouter_chat_completions_api_provider.go rename to pkg/llm/provider/openai/openrouter_chat_completions_api_provider.go index 1e924bdd..2c1c7962 100644 --- a/pkg/llm/openrouter_chat_completions_api_provider.go +++ b/pkg/llm/provider/openai/openrouter_chat_completions_api_provider.go @@ -1,9 +1,10 @@ -package llm +package openai import ( "net/http" "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/llm/provider" "github.com/mayswind/ezbookkeeping/pkg/settings" ) @@ -37,7 +38,7 @@ func (p *OpenRouterChatCompletionsAPIProvider) GetModelID() string { } // NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance -func NewOpenRouterLargeLanguageModelProvider(llmConfig *settings.LLMConfig) LargeLanguageModelProvider { +func NewOpenRouterLargeLanguageModelProvider(llmConfig *settings.LLMConfig) provider.LargeLanguageModelProvider { return newCommonOpenAIChatCompletionsAPILargeLanguageModelAdapter(&OpenRouterChatCompletionsAPIProvider{ OpenRouterAPIKey: llmConfig.OpenRouterAPIKey, OpenRouterModelID: llmConfig.OpenRouterModelID,