create transactions from AI receipt image recognition results

This commit is contained in:
MaysWind
2025-09-21 04:00:56 +08:00
parent 00f1d0418f
commit 5d88287ae2
50 changed files with 2356 additions and 22 deletions
@@ -0,0 +1,91 @@
package llm
import (
"crypto/tls"
"io"
"net/http"
"time"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/settings"
"github.com/mayswind/ezbookkeeping/pkg/utils"
)
// HttpLargeLanguageModelProvider defines the structure of http large language model provider
type HttpLargeLanguageModelProvider interface {
// BuildTextualRequest returns the http request by the provider api definition
BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error)
// ParseTextualResponse returns the textual response entity by the provider api definition
ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error)
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
GetReceiptImageRecognitionModelID() string
}
// CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider
type CommonHttpLargeLanguageModelProvider struct {
LargeLanguageModelProvider
provider HttpLargeLanguageModelProvider
}
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the OpenAI common compatible large language model provider
func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
return p.getTextualResponse(c, uid, currentConfig, request, p.provider.GetReceiptImageRecognitionModelID(), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
}
func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
if modelId == "" {
return nil, errs.ErrInvalidLLMModelId
}
transport := http.DefaultTransport.(*http.Transport).Clone()
utils.SetProxyUrl(transport, currentConfig.LargeLanguageModelAPIProxy)
if currentConfig.LargeLanguageModelAPISkipTLSVerify {
transport.TLSClientConfig = &tls.Config{
InsecureSkipVerify: true,
}
}
client := &http.Client{
Transport: transport,
Timeout: time.Duration(currentConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond,
}
httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, modelId, responseType)
if err != nil {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
httpRequest.Header.Set("User-Agent", settings.GetUserAgent())
resp, err := client.Do(httpRequest)
if err != nil {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to request large language model api for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
log.Debugf(c, "[http_large_language_model_provider.getTextualResponse] response is %s", body)
if resp.StatusCode != 200 {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to get large language model api response for user \"uid:%d\", because response code is %d", uid, resp.StatusCode)
return nil, errs.ErrFailedToRequestRemoteApi
}
return p.provider.ParseTextualResponse(c, uid, body, responseType)
}
func newCommonHttpLargeLanguageModelProvider(provider HttpLargeLanguageModelProvider) *CommonHttpLargeLanguageModelProvider {
return &CommonHttpLargeLanguageModelProvider{
provider: provider,
}
}
+33
View File
@@ -0,0 +1,33 @@
package llm
import "reflect"
type LargeLanguageModelRequestPromptType byte
// Large Language Model Request Prompt Type
const (
LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_TEXT LargeLanguageModelRequestPromptType = 0
LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL LargeLanguageModelRequestPromptType = 1
)
type LargeLanguageModelResponseFormat byte
// Large Language Model Response Format
const (
LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_TEXT LargeLanguageModelResponseFormat = 0
LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON LargeLanguageModelResponseFormat = 1
)
// LargeLanguageModelRequest represents a request to a large language model
type LargeLanguageModelRequest struct {
Stream bool
SystemPrompt string
UserPrompt []byte
UserPromptType LargeLanguageModelRequestPromptType
ResponseJsonObjectType reflect.Type
}
// LargeLanguageModelTextualResponse represents a textual response from a large language model
type LargeLanguageModelTextualResponse struct {
Content string
}
+12
View File
@@ -0,0 +1,12 @@
package llm
import (
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// LargeLanguageModelProvider defines the structure of large language model provider
type LargeLanguageModelProvider interface {
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the large language model provider by receipt image recognition model
GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error)
}
@@ -0,0 +1,45 @@
package llm
import (
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// LargeLanguageModelProviderContainer contains the current large language model provider
type LargeLanguageModelProviderContainer struct {
current LargeLanguageModelProvider
}
// Initialize a large language model provider container singleton instance
var (
Container = &LargeLanguageModelProviderContainer{}
)
// InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config
func InitializeLargeLanguageModelProvider(config *settings.Config) error {
if config.LLMProvider == settings.OpenAILLMProvider {
Container.current = NewOpenAILargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OpenAICompatibleLLMProvider {
Container.current = NewOpenAICompatibleLargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OpenRouterLLMProvider {
Container.current = NewOpenRouterLargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OllamaLLMProvider {
Container.current = NewOllamaLargeLanguageModelProvider(config)
return nil
}
return errs.ErrInvalidLLMProvider
}
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model
func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
if Container.current == nil {
return nil, errs.ErrInvalidLLMProvider
}
return l.current.GetJsonResponseByReceiptImageRecognitionModel(c, uid, currentConfig, request)
}
@@ -0,0 +1,153 @@
package llm
import (
"bytes"
"encoding/base64"
"encoding/json"
"net/http"
"strings"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
const ollamaChatCompletionsPath = "api/chat"
// OllamaLargeLanguageModelProvider defines the structure of Ollama large language model provider
type OllamaLargeLanguageModelProvider struct {
CommonHttpLargeLanguageModelProvider
OllamaServerURL string
ReceiptImageRecognitionModelID string
}
// BuildTextualRequest returns the http request by Ollama provider
func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
if err != nil {
return nil, err
}
httpRequest, err := http.NewRequest("POST", p.getOllamaRequestUrl(), bytes.NewReader(requestBody))
if err != nil {
return nil, err
}
httpRequest.Header.Set("Content-Type", "application/json")
return httpRequest, nil
}
// ParseTextualResponse returns the textual response by Ollama provider
func (p *OllamaLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
responseBody := make(map[string]any)
err := json.Unmarshal(body, &responseBody)
if err != nil {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
message, ok := responseBody["message"].(map[string]any)
if !ok {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no message found in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
content, ok := message["content"].(string)
if !ok {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```json")
content = strings.TrimSuffix(content, "```")
} else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```")
content = strings.TrimSuffix(content, "```")
}
}
textualResponse := &LargeLanguageModelTextualResponse{
Content: content,
}
return textualResponse, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of Ollama provider
func (p *OllamaLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
requestMessages := make([]any, 0)
if request.SystemPrompt != "" {
requestMessages = append(requestMessages, map[string]string{
"role": "system",
"content": request.SystemPrompt,
})
}
if len(request.UserPrompt) > 0 {
imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt)
if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
requestMessages = append(requestMessages, map[string]any{
"role": "user",
"content": "",
"images": []string{imageBase64Data},
})
} else {
requestMessages = append(requestMessages, map[string]string{
"role": "user",
"content": string(request.UserPrompt),
})
}
}
requestBody := make(map[string]any)
requestBody["model"] = modelId
requestBody["stream"] = request.Stream
requestBody["messages"] = requestMessages
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
requestBody["format"] = "json"
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
log.Errorf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
log.Debugf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
return requestBodyBytes, nil
}
func (p *OllamaLargeLanguageModelProvider) getOllamaRequestUrl() string {
url := p.OllamaServerURL
if url[len(url)-1] != '/' {
url += "/"
}
url += ollamaChatCompletionsPath
return url
}
// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance
func NewOllamaLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{
OllamaServerURL: config.OllamaServerURL,
ReceiptImageRecognitionModelID: config.OllamaReceiptImageRecognitionModelID,
})
}
@@ -0,0 +1,138 @@
package llm
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/mayswind/ezbookkeeping/pkg/core"
)
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
request := &LargeLanguageModelRequest{
SystemPrompt: "You are a helpful assistant.",
UserPrompt: []byte("Hello, how are you?"),
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
}
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
request := &LargeLanguageModelRequest{
SystemPrompt: "What's in this image?",
UserPrompt: []byte("fakedata"),
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":\"\",\"images\":[\"ZmFrZWRhdGE=\"],\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant",
"content": "This is a test response"
}
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "This is a test response", result.Content)
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant",
"content": ""
}
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "", result.Content)
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {}
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant"
}
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := "error"
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_GetOllamaRequestUrl(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://localhost:11434/",
}
url := provider.getOllamaRequestUrl()
assert.Equal(t, "http://localhost:11434/api/chat", url)
provider = &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://localhost:11434",
}
url = provider.getOllamaRequestUrl()
assert.Equal(t, "http://localhost:11434/api/chat", url)
provider = &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://example.com/ollama/",
}
url = provider.getOllamaRequestUrl()
assert.Equal(t, "http://example.com/ollama/api/chat", url)
}
@@ -0,0 +1,187 @@
package llm
import (
"bytes"
"encoding/base64"
"encoding/json"
"io"
"net/http"
"strings"
"github.com/invopop/jsonschema"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
)
// OpenAIChatCompletionsLargeLanguageModelProvider defines the structure of OpenAI chat completions compatible large language model provider
type OpenAIChatCompletionsLargeLanguageModelProvider interface {
// BuildChatCompletionsHttpRequest returns the chat completions http request
BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error)
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
GetReceiptImageRecognitionModelID() string
}
// OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api
type OpenAICommonChatCompletionsHttpLargeLanguageModelProvider struct {
CommonHttpLargeLanguageModelProvider
provider OpenAIChatCompletionsLargeLanguageModelProvider
}
// BuildTextualRequest returns the http request by OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
if err != nil {
return nil, err
}
httpRequest, err := p.provider.BuildChatCompletionsHttpRequest(c, uid)
if err != nil {
return nil, err
}
httpRequest.Body = io.NopCloser(bytes.NewReader(requestBody))
httpRequest.Header.Set("Content-Type", "application/json")
return httpRequest, nil
}
// ParseTextualResponse returns the textual response by OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
responseBody := make(map[string]any)
err := json.Unmarshal(body, &responseBody)
if err != nil {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
choices, ok := responseBody["choices"].([]any)
if !ok || len(choices) < 1 {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no choices found in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
firstChoice, ok := choices[0].(map[string]any)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] invalid choice format in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
message, ok := firstChoice["message"].(map[string]any)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no message found in choice for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
content, ok := message["content"].(string)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```json")
content = strings.TrimSuffix(content, "```")
} else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```")
content = strings.TrimSuffix(content, "```")
}
}
textualResponse := &LargeLanguageModelTextualResponse{
Content: content,
}
return textualResponse, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.provider.GetReceiptImageRecognitionModelID()
}
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
requestMessages := make([]any, 0)
if request.SystemPrompt != "" {
requestMessages = append(requestMessages, map[string]string{
"role": "system",
"content": request.SystemPrompt,
})
}
if len(request.UserPrompt) > 0 {
if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
imageBase64Data := "data:image/png;base64," + base64.StdEncoding.EncodeToString(request.UserPrompt)
requestMessages = append(requestMessages, map[string]any{
"role": "user",
"content": []any{
core.O{
"type": "image_url",
"image_url": core.O{
"url": imageBase64Data,
},
},
},
})
} else {
requestMessages = append(requestMessages, map[string]string{
"role": "user",
"content": string(request.UserPrompt),
})
}
}
requestBody := make(map[string]any)
requestBody["model"] = modelId
requestBody["stream"] = request.Stream
requestBody["messages"] = requestMessages
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if request.ResponseJsonObjectType != nil {
schemeGenerator := jsonschema.Reflector{
Anonymous: true,
DoNotReference: true,
ExpandedStruct: true,
}
schema := schemeGenerator.ReflectFromType(request.ResponseJsonObjectType)
schema.Version = ""
requestBody["response_format"] = core.O{
"type": "json_schema",
"json_schema": schema,
}
} else {
requestBody["response_format"] = core.O{
"type": "json_object",
}
}
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
log.Debugf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
return requestBodyBytes, nil
}
func newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(provider OpenAIChatCompletionsLargeLanguageModelProvider) LargeLanguageModelProvider {
return newCommonHttpLargeLanguageModelProvider(&OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: provider,
})
}
@@ -0,0 +1,157 @@
package llm
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/mayswind/ezbookkeeping/pkg/core"
)
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
request := &LargeLanguageModelRequest{
SystemPrompt: "You are a helpful assistant.",
UserPrompt: []byte("Hello, how are you?"),
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
request := &LargeLanguageModelRequest{
SystemPrompt: "What's in this image?",
UserPrompt: []byte("fakedata"),
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":[{\"image_url\":{\"url\":\"data:image/png;base64,ZmFrZWRhdGE=\"},\"type\":\"image_url\"}],\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"created": 1234567890,
"model": "test",
"usage": {
"prompt_tokens": 13,
"completion_tokens": 7,
"total_tokens": 20
},
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": "This is a test response"
}
}
]
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "This is a test response", result.Content)
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": ""
}
}
]
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "", result.Content)
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"choices": []
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "chatcmpl-123",
"object": "chat.completion",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant"
}
}
]
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := "error"
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
@@ -0,0 +1,58 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
const openAICompatibleChatCompletionsPath = "chat/completions"
// OpenAICompatibleLargeLanguageModelProvider defines the structure of OpenAI compatible large language model provider
type OpenAICompatibleLargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenAICompatibleBaseURL string
OpenAICompatibleAPIKey string
ReceiptImageRecognitionModelID string
}
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider
func (p *OpenAICompatibleLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", p.getFinalChatCompletionsRequestUrl(), nil)
if err != nil {
return nil, err
}
if p.OpenAICompatibleAPIKey != "" {
req.Header.Set("Authorization", "Bearer "+p.OpenAICompatibleAPIKey)
}
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI compatible provider
func (p *OpenAICompatibleLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string {
url := p.OpenAICompatibleBaseURL
if url[len(url)-1] != '/' {
url += "/"
}
url += openAICompatibleChatCompletionsPath
return url
}
// NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance
func NewOpenAICompatibleLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: config.OpenAICompatibleBaseURL,
OpenAICompatibleAPIKey: config.OpenAICompatibleAPIKey,
ReceiptImageRecognitionModelID: config.OpenAICompatibleReceiptImageRecognitionModelID,
})
}
@@ -0,0 +1,27 @@
package llm
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestOpenAICompatibleLargeLanguageModelProvider_GetFinalRequestUrl(t *testing.T) {
provider := &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://api.example.com/v1/",
}
url := provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
provider = &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://api.example.com/v1",
}
url = provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
provider = &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://example.com/api",
}
url = provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://example.com/api/chat/completions", url)
}
@@ -0,0 +1,43 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// OpenAILargeLanguageModelProvider defines the structure of OpenAI large language model provider
type OpenAILargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenAIAPIKey string
ReceiptImageRecognitionModelID string
}
const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions"
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI provider
func (p *OpenAILargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", openAIChatCompletionsUrl, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+p.OpenAIAPIKey)
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI provider
func (p *OpenAILargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
// NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance
func NewOpenAILargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{
OpenAIAPIKey: config.OpenAIAPIKey,
ReceiptImageRecognitionModelID: config.OpenAIReceiptImageRecognitionModelID,
})
}
@@ -0,0 +1,45 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// OpenRouterLargeLanguageModelProvider defines the structure of OpenRouter large language model provider
type OpenRouterLargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenRouterAPIKey string
ReceiptImageRecognitionModelID string
}
const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions"
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenRouter provider
func (p *OpenRouterLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", openRouterChatCompletionsUrl, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+p.OpenRouterAPIKey)
req.Header.Set("HTTP-Referer", "https://ezbookkeeping.mayswind.net/")
req.Header.Set("X-Title", "ezBookkeeping")
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenRouter provider
func (p *OpenRouterLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
// NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance
func NewOpenRouterLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{
OpenRouterAPIKey: config.OpenRouterAPIKey,
ReceiptImageRecognitionModelID: config.OpenRouterReceiptImageRecognitionModelID,
})
}