create transactions from AI receipt image recognition results

This commit is contained in:
MaysWind
2025-09-21 04:00:56 +08:00
parent 00f1d0418f
commit 5d88287ae2
50 changed files with 2356 additions and 22 deletions
+345
View File
@@ -0,0 +1,345 @@
package api
import (
"bytes"
"encoding/json"
"io"
"strings"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/llm"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/models"
"github.com/mayswind/ezbookkeeping/pkg/services"
"github.com/mayswind/ezbookkeeping/pkg/settings"
"github.com/mayswind/ezbookkeeping/pkg/templates"
"github.com/mayswind/ezbookkeeping/pkg/utils"
)
// LargeLanguageModelsApi represents large language models api
type LargeLanguageModelsApi struct {
ApiUsingConfig
transactionCategories *services.TransactionCategoryService
transactionTags *services.TransactionTagService
accounts *services.AccountService
users *services.UserService
}
// Initialize a large language models api singleton instance
var (
LargeLanguageModels = &LargeLanguageModelsApi{
ApiUsingConfig: ApiUsingConfig{
container: settings.Container,
},
transactionCategories: services.TransactionCategories,
transactionTags: services.TransactionTags,
accounts: services.Accounts,
users: services.Users,
}
)
// RecognizeReceiptImageHandler returns the recognized receipt image result
func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext) (any, *errs.Error) {
if a.CurrentConfig().LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition {
return nil, errs.ErrLargeLanguageModelProviderNotEnabled
}
utcOffset, err := c.GetClientTimezoneOffset()
if err != nil {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] cannot get client timezone offset, because %s", err.Error())
return nil, errs.ErrClientTimezoneOffsetInvalid
}
uid := c.GetCurrentUid()
user, err := a.users.GetUserById(c, uid)
if err != nil {
if !errs.IsCustomError(err) {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get user for user \"uid:%d\", because %s", uid, err.Error())
}
return false, errs.ErrUserNotFound
}
if user.FeatureRestriction.Contains(core.USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION) {
return false, errs.ErrNotPermittedToPerformThisAction
}
form, err := c.MultipartForm()
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get multi-part form data for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrParameterInvalid
}
imageFiles := form.File["image"]
if len(imageFiles) < 1 {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] there is no image in request for user \"uid:%d\"", uid)
return nil, errs.ErrNoAIRecognitionImage
}
if imageFiles[0].Size < 1 {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the size of image in request is zero for user \"uid:%d\"", uid)
return nil, errs.ErrAIRecognitionImageIsEmpty
}
if imageFiles[0].Size > int64(a.CurrentConfig().MaxAIRecognitionPictureFileSize) {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the upload file size \"%d\" exceeds the maximum size \"%d\" of image for user \"uid:%d\"", imageFiles[0].Size, a.CurrentConfig().MaxAIRecognitionPictureFileSize, uid)
return nil, errs.ErrExceedMaxAIRecognitionImageFileSize
}
fileExtension := utils.GetFileNameExtension(imageFiles[0].Filename)
if utils.GetImageContentType(fileExtension) == "" {
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the file extension \"%s\" of image in request is not supported for user \"uid:%d\"", fileExtension, uid)
return nil, errs.ErrImageTypeNotSupported
}
imageFile, err := imageFiles[0].Open()
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get image file from request for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
defer imageFile.Close()
imageData, err := io.ReadAll(imageFile)
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to read image file from request for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
accounts, err := a.accounts.GetAllAccountsByUid(c, uid)
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get all accounts for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.Or(err, errs.ErrOperationFailed)
}
accountMap := a.accounts.GetVisibleAccountNameMapByList(accounts)
accountNames := make([]string, 0, len(accounts))
for i := 0; i < len(accounts); i++ {
if accounts[i].Hidden || accounts[i].Type == models.ACCOUNT_TYPE_MULTI_SUB_ACCOUNTS {
continue
}
accountNames = append(accountNames, accounts[i].Name)
}
categories, err := a.transactionCategories.GetAllCategoriesByUid(c, uid, 0, -1)
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get categories for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.Or(err, errs.ErrOperationFailed)
}
incomeCategoryMap := make(map[string]*models.TransactionCategory)
incomeCategoryNames := make([]string, 0)
expenseCategoryMap := make(map[string]*models.TransactionCategory)
expenseCategoryNames := make([]string, 0)
transferCategoryMap := make(map[string]*models.TransactionCategory)
transferCategoryNames := make([]string, 0)
for i := 0; i < len(categories); i++ {
category := categories[i]
if category.Hidden || category.ParentCategoryId == models.LevelOneTransactionCategoryParentId {
continue
}
if category.Type == models.CATEGORY_TYPE_INCOME {
incomeCategoryMap[category.Name] = category
incomeCategoryNames = append(incomeCategoryNames, category.Name)
} else if category.Type == models.CATEGORY_TYPE_EXPENSE {
expenseCategoryMap[category.Name] = category
expenseCategoryNames = append(expenseCategoryNames, category.Name)
} else if category.Type == models.CATEGORY_TYPE_TRANSFER {
transferCategoryMap[category.Name] = category
transferCategoryNames = append(transferCategoryNames, category.Name)
}
}
tags, err := a.transactionTags.GetAllTagsByUid(c, uid)
if err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get tags for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.Or(err, errs.ErrOperationFailed)
}
tagMap := a.transactionTags.GetVisibleTagNameMapByList(tags)
tagNames := make([]string, 0, len(tags))
for i := 0; i < len(tags); i++ {
if tags[i].Hidden {
continue
}
tagNames = append(tagNames, tags[i].Name)
}
systemPrompt, err := templates.GetTemplate(templates.SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION)
if err != nil {
return nil, errs.Or(err, errs.ErrOperationFailed)
}
systemPromptParams := map[string]any{
"AllExpenseCategoryNames": strings.Join(expenseCategoryNames, "\n"),
"AllIncomeCategoryNames": strings.Join(incomeCategoryNames, "\n"),
"AllTransferCategoryNames": strings.Join(transferCategoryNames, "\n"),
"AllAccountNames": strings.Join(accountNames, "\n"),
"AllTagNames": strings.Join(tagNames, "\n"),
}
var bodyBuffer bytes.Buffer
err = systemPrompt.Execute(&bodyBuffer, systemPromptParams)
if err != nil {
return nil, errs.Or(err, errs.ErrOperationFailed)
}
llmRequest := &llm.LargeLanguageModelRequest{
Stream: false,
SystemPrompt: strings.ReplaceAll(bodyBuffer.String(), "\r\n", "\n"),
UserPrompt: imageData,
UserPromptType: llm.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
}
llmResponse, err := llm.Container.GetJsonResponseByReceiptImageRecognitionModel(c, c.GetCurrentUid(), a.CurrentConfig(), llmRequest)
if err != nil {
return nil, errs.Or(err, errs.ErrOperationFailed)
}
var result *models.RecognizedReceiptImageResult
if err := json.Unmarshal([]byte(llmResponse.Content), &result); err != nil {
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to unmarshal recognized receipt image result from llm response \"%s\" for user \"uid:%d\", because %s", llmResponse.Content, uid, err.Error())
return nil, errs.Or(err, errs.ErrOperationFailed)
}
return a.parseRecognizedReceiptImageResponse(c, uid, utcOffset, result, accountMap, expenseCategoryMap, incomeCategoryMap, transferCategoryMap, tagMap)
}
func (a *LargeLanguageModelsApi) parseRecognizedReceiptImageResponse(c *core.WebContext, uid int64, utcOffset int16, recognizedResult *models.RecognizedReceiptImageResult, accountMap map[string]*models.Account, expenseCategoryMap map[string]*models.TransactionCategory, incomeCategoryMap map[string]*models.TransactionCategory, transferCategoryMap map[string]*models.TransactionCategory, tagMap map[string]*models.TransactionTag) (*models.RecognizedReceiptImageResponse, *errs.Error) {
recognizedReceiptImageResponse := &models.RecognizedReceiptImageResponse{
Type: models.TRANSACTION_TYPE_EXPENSE,
}
if recognizedResult == nil {
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed result is null")
return nil, errs.ErrOperationFailed
}
if recognizedResult.Type == "income" {
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_INCOME
if len(recognizedResult.CategoryName) > 0 {
category, exists := incomeCategoryMap[recognizedResult.CategoryName]
if exists {
recognizedReceiptImageResponse.CategoryId = category.CategoryId
}
}
} else if recognizedResult.Type == "expense" {
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_EXPENSE
if len(recognizedResult.CategoryName) > 0 {
category, exists := expenseCategoryMap[recognizedResult.CategoryName]
if exists {
recognizedReceiptImageResponse.CategoryId = category.CategoryId
}
}
} else if recognizedResult.Type == "transfer" {
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_TRANSFER
if len(recognizedResult.CategoryName) > 0 {
category, exists := transferCategoryMap[recognizedResult.CategoryName]
if exists {
recognizedReceiptImageResponse.CategoryId = category.CategoryId
}
}
} else {
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed transaction type \"%s\" is invalid", recognizedResult.Type)
return nil, errs.ErrOperationFailed
}
if len(recognizedResult.Time) > 0 {
timestamp, err := utils.ParseFromLongDateTime(recognizedResult.Time, utcOffset)
if err != nil {
log.Warnf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed time \"%s\" is invalid", recognizedResult.Time)
} else {
recognizedReceiptImageResponse.Time = timestamp.Unix()
}
}
if len(recognizedResult.Amount) > 0 {
amount, err := utils.ParseAmount(recognizedResult.Amount)
if err != nil {
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed amount \"%s\" is invalid", recognizedResult.Amount)
return nil, errs.ErrOperationFailed
}
recognizedReceiptImageResponse.SourceAmount = amount
if recognizedReceiptImageResponse.Type == models.TRANSACTION_TYPE_TRANSFER && len(recognizedResult.DestinationAmount) > 0 {
destinationAmount, err := utils.ParseAmount(recognizedResult.DestinationAmount)
if err != nil {
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed destination amount \"%s\" is invalid", recognizedResult.DestinationAmount)
return nil, errs.ErrOperationFailed
}
recognizedReceiptImageResponse.DestinationAmount = destinationAmount
}
}
if len(recognizedResult.AccountName) > 0 {
account, exists := accountMap[recognizedResult.AccountName]
if exists {
recognizedReceiptImageResponse.SourceAccountId = account.AccountId
}
}
if len(recognizedResult.DestinationAccountName) > 0 {
account, exists := accountMap[recognizedResult.DestinationAccountName]
if exists {
recognizedReceiptImageResponse.DestinationAccountId = account.AccountId
}
}
if len(recognizedResult.TagNames) > 0 {
tagIds := make([]string, 0, len(recognizedResult.TagNames))
for i := 0; i < len(recognizedResult.TagNames); i++ {
tagName := recognizedResult.TagNames[i]
tag, exists := tagMap[tagName]
if exists {
tagIds = append(tagIds, utils.Int64ToString(tag.TagId))
}
}
recognizedReceiptImageResponse.TagIds = tagIds
}
if len(recognizedResult.Description) > 0 {
recognizedReceiptImageResponse.Comment = recognizedResult.Description
}
return recognizedReceiptImageResponse, nil
}
+6
View File
@@ -47,6 +47,12 @@ func (a *ServerSettingsApi) ServerSettingsJavascriptHandler(c *core.WebContext)
a.appendBooleanSetting(builder, "mcp", config.EnableMCPServer)
}
if config.LLMProvider != "" {
if config.TransactionFromAIImageRecognition {
a.appendBooleanSetting(builder, "llmt", config.TransactionFromAIImageRecognition)
}
}
if config.LoginPageTips.Enabled {
a.appendMultiLanguageTipSetting(builder, "lpt", config.LoginPageTips)
}
+14 -13
View File
@@ -76,19 +76,20 @@ type UserFeatureRestrictionType uint64
// User Feature Restriction Type
const (
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1
USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3
USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4
USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5
USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6
USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7
USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8
USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9
USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10
USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11
USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12
USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1
USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3
USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4
USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5
USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6
USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7
USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8
USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9
USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10
USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11
USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12
USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13
USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION UserFeatureRestrictionType = 14
)
const userFeatureRestrictionTypeMinValue UserFeatureRestrictionType = USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD
+1
View File
@@ -40,6 +40,7 @@ const (
NormalSubcategoryConverter = 12
NormalSubcategoryUserCustomExchangeRate = 13
NormalSubcategoryModelContextProtocol = 14
NormalSubcategoryLargeLanguageModel = 15
)
// Error represents the specific error returned to user
+11
View File
@@ -0,0 +1,11 @@
package errs
import "net/http"
// Error codes related to large language model features
var (
ErrLargeLanguageModelProviderNotEnabled = NewNormalError(NormalSubcategoryLargeLanguageModel, 0, http.StatusBadRequest, "llm provider is not enabled")
ErrNoAIRecognitionImage = NewNormalError(NormalSubcategoryLargeLanguageModel, 1, http.StatusBadRequest, "no image for AI recognition")
ErrAIRecognitionImageIsEmpty = NewNormalError(NormalSubcategoryLargeLanguageModel, 2, http.StatusBadRequest, "image for AI recognition is empty")
ErrExceedMaxAIRecognitionImageFileSize = NewNormalError(NormalSubcategoryLargeLanguageModel, 3, http.StatusBadRequest, "exceed the maximum size of image file for AI recognition")
)
+2
View File
@@ -24,4 +24,6 @@ var (
ErrInvalidPasswordResetTokenExpiredTime = NewSystemError(SystemSubcategorySetting, 17, http.StatusInternalServerError, "invalid password reset token expired time")
ErrInvalidExchangeRatesDataSource = NewSystemError(SystemSubcategorySetting, 18, http.StatusInternalServerError, "invalid exchange rates data source")
ErrInvalidIpAddressPattern = NewSystemError(SystemSubcategorySetting, 19, http.StatusInternalServerError, "invalid ip address pattern")
ErrInvalidLLMProvider = NewSystemError(SystemSubcategorySetting, 20, http.StatusInternalServerError, "invalid llm provider")
ErrInvalidLLMModelId = NewSystemError(SystemSubcategorySetting, 21, http.StatusInternalServerError, "invalid llm model id")
)
@@ -0,0 +1,91 @@
package llm
import (
"crypto/tls"
"io"
"net/http"
"time"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/settings"
"github.com/mayswind/ezbookkeeping/pkg/utils"
)
// HttpLargeLanguageModelProvider defines the structure of http large language model provider
type HttpLargeLanguageModelProvider interface {
// BuildTextualRequest returns the http request by the provider api definition
BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error)
// ParseTextualResponse returns the textual response entity by the provider api definition
ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error)
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
GetReceiptImageRecognitionModelID() string
}
// CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider
type CommonHttpLargeLanguageModelProvider struct {
LargeLanguageModelProvider
provider HttpLargeLanguageModelProvider
}
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the OpenAI common compatible large language model provider
func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
return p.getTextualResponse(c, uid, currentConfig, request, p.provider.GetReceiptImageRecognitionModelID(), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
}
func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
if modelId == "" {
return nil, errs.ErrInvalidLLMModelId
}
transport := http.DefaultTransport.(*http.Transport).Clone()
utils.SetProxyUrl(transport, currentConfig.LargeLanguageModelAPIProxy)
if currentConfig.LargeLanguageModelAPISkipTLSVerify {
transport.TLSClientConfig = &tls.Config{
InsecureSkipVerify: true,
}
}
client := &http.Client{
Transport: transport,
Timeout: time.Duration(currentConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond,
}
httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, modelId, responseType)
if err != nil {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
httpRequest.Header.Set("User-Agent", settings.GetUserAgent())
resp, err := client.Do(httpRequest)
if err != nil {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to request large language model api for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
defer resp.Body.Close()
body, err := io.ReadAll(resp.Body)
log.Debugf(c, "[http_large_language_model_provider.getTextualResponse] response is %s", body)
if resp.StatusCode != 200 {
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to get large language model api response for user \"uid:%d\", because response code is %d", uid, resp.StatusCode)
return nil, errs.ErrFailedToRequestRemoteApi
}
return p.provider.ParseTextualResponse(c, uid, body, responseType)
}
func newCommonHttpLargeLanguageModelProvider(provider HttpLargeLanguageModelProvider) *CommonHttpLargeLanguageModelProvider {
return &CommonHttpLargeLanguageModelProvider{
provider: provider,
}
}
+33
View File
@@ -0,0 +1,33 @@
package llm
import "reflect"
type LargeLanguageModelRequestPromptType byte
// Large Language Model Request Prompt Type
const (
LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_TEXT LargeLanguageModelRequestPromptType = 0
LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL LargeLanguageModelRequestPromptType = 1
)
type LargeLanguageModelResponseFormat byte
// Large Language Model Response Format
const (
LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_TEXT LargeLanguageModelResponseFormat = 0
LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON LargeLanguageModelResponseFormat = 1
)
// LargeLanguageModelRequest represents a request to a large language model
type LargeLanguageModelRequest struct {
Stream bool
SystemPrompt string
UserPrompt []byte
UserPromptType LargeLanguageModelRequestPromptType
ResponseJsonObjectType reflect.Type
}
// LargeLanguageModelTextualResponse represents a textual response from a large language model
type LargeLanguageModelTextualResponse struct {
Content string
}
+12
View File
@@ -0,0 +1,12 @@
package llm
import (
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// LargeLanguageModelProvider defines the structure of large language model provider
type LargeLanguageModelProvider interface {
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the large language model provider by receipt image recognition model
GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error)
}
@@ -0,0 +1,45 @@
package llm
import (
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// LargeLanguageModelProviderContainer contains the current large language model provider
type LargeLanguageModelProviderContainer struct {
current LargeLanguageModelProvider
}
// Initialize a large language model provider container singleton instance
var (
Container = &LargeLanguageModelProviderContainer{}
)
// InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config
func InitializeLargeLanguageModelProvider(config *settings.Config) error {
if config.LLMProvider == settings.OpenAILLMProvider {
Container.current = NewOpenAILargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OpenAICompatibleLLMProvider {
Container.current = NewOpenAICompatibleLargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OpenRouterLLMProvider {
Container.current = NewOpenRouterLargeLanguageModelProvider(config)
return nil
} else if config.LLMProvider == settings.OllamaLLMProvider {
Container.current = NewOllamaLargeLanguageModelProvider(config)
return nil
}
return errs.ErrInvalidLLMProvider
}
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model
func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
if Container.current == nil {
return nil, errs.ErrInvalidLLMProvider
}
return l.current.GetJsonResponseByReceiptImageRecognitionModel(c, uid, currentConfig, request)
}
@@ -0,0 +1,153 @@
package llm
import (
"bytes"
"encoding/base64"
"encoding/json"
"net/http"
"strings"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
const ollamaChatCompletionsPath = "api/chat"
// OllamaLargeLanguageModelProvider defines the structure of Ollama large language model provider
type OllamaLargeLanguageModelProvider struct {
CommonHttpLargeLanguageModelProvider
OllamaServerURL string
ReceiptImageRecognitionModelID string
}
// BuildTextualRequest returns the http request by Ollama provider
func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
if err != nil {
return nil, err
}
httpRequest, err := http.NewRequest("POST", p.getOllamaRequestUrl(), bytes.NewReader(requestBody))
if err != nil {
return nil, err
}
httpRequest.Header.Set("Content-Type", "application/json")
return httpRequest, nil
}
// ParseTextualResponse returns the textual response by Ollama provider
func (p *OllamaLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
responseBody := make(map[string]any)
err := json.Unmarshal(body, &responseBody)
if err != nil {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
message, ok := responseBody["message"].(map[string]any)
if !ok {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no message found in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
content, ok := message["content"].(string)
if !ok {
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```json")
content = strings.TrimSuffix(content, "```")
} else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```")
content = strings.TrimSuffix(content, "```")
}
}
textualResponse := &LargeLanguageModelTextualResponse{
Content: content,
}
return textualResponse, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of Ollama provider
func (p *OllamaLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
requestMessages := make([]any, 0)
if request.SystemPrompt != "" {
requestMessages = append(requestMessages, map[string]string{
"role": "system",
"content": request.SystemPrompt,
})
}
if len(request.UserPrompt) > 0 {
imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt)
if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
requestMessages = append(requestMessages, map[string]any{
"role": "user",
"content": "",
"images": []string{imageBase64Data},
})
} else {
requestMessages = append(requestMessages, map[string]string{
"role": "user",
"content": string(request.UserPrompt),
})
}
}
requestBody := make(map[string]any)
requestBody["model"] = modelId
requestBody["stream"] = request.Stream
requestBody["messages"] = requestMessages
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
requestBody["format"] = "json"
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
log.Errorf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
log.Debugf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
return requestBodyBytes, nil
}
func (p *OllamaLargeLanguageModelProvider) getOllamaRequestUrl() string {
url := p.OllamaServerURL
if url[len(url)-1] != '/' {
url += "/"
}
url += ollamaChatCompletionsPath
return url
}
// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance
func NewOllamaLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{
OllamaServerURL: config.OllamaServerURL,
ReceiptImageRecognitionModelID: config.OllamaReceiptImageRecognitionModelID,
})
}
@@ -0,0 +1,138 @@
package llm
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/mayswind/ezbookkeeping/pkg/core"
)
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
request := &LargeLanguageModelRequest{
SystemPrompt: "You are a helpful assistant.",
UserPrompt: []byte("Hello, how are you?"),
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
}
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
request := &LargeLanguageModelRequest{
SystemPrompt: "What's in this image?",
UserPrompt: []byte("fakedata"),
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":\"\",\"images\":[\"ZmFrZWRhdGE=\"],\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant",
"content": "This is a test response"
}
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "This is a test response", result.Content)
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant",
"content": ""
}
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "", result.Content)
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {}
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := `{
"model": "test",
"created_at": "2025-09-01T01:02:03.456789Z",
"message": {
"role": "assistant"
}
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{}
response := "error"
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOllamaLargeLanguageModelProvider_GetOllamaRequestUrl(t *testing.T) {
provider := &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://localhost:11434/",
}
url := provider.getOllamaRequestUrl()
assert.Equal(t, "http://localhost:11434/api/chat", url)
provider = &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://localhost:11434",
}
url = provider.getOllamaRequestUrl()
assert.Equal(t, "http://localhost:11434/api/chat", url)
provider = &OllamaLargeLanguageModelProvider{
OllamaServerURL: "http://example.com/ollama/",
}
url = provider.getOllamaRequestUrl()
assert.Equal(t, "http://example.com/ollama/api/chat", url)
}
@@ -0,0 +1,187 @@
package llm
import (
"bytes"
"encoding/base64"
"encoding/json"
"io"
"net/http"
"strings"
"github.com/invopop/jsonschema"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/errs"
"github.com/mayswind/ezbookkeeping/pkg/log"
)
// OpenAIChatCompletionsLargeLanguageModelProvider defines the structure of OpenAI chat completions compatible large language model provider
type OpenAIChatCompletionsLargeLanguageModelProvider interface {
// BuildChatCompletionsHttpRequest returns the chat completions http request
BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error)
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
GetReceiptImageRecognitionModelID() string
}
// OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api
type OpenAICommonChatCompletionsHttpLargeLanguageModelProvider struct {
CommonHttpLargeLanguageModelProvider
provider OpenAIChatCompletionsLargeLanguageModelProvider
}
// BuildTextualRequest returns the http request by OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
if err != nil {
return nil, err
}
httpRequest, err := p.provider.BuildChatCompletionsHttpRequest(c, uid)
if err != nil {
return nil, err
}
httpRequest.Body = io.NopCloser(bytes.NewReader(requestBody))
httpRequest.Header.Set("Content-Type", "application/json")
return httpRequest, nil
}
// ParseTextualResponse returns the textual response by OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
responseBody := make(map[string]any)
err := json.Unmarshal(body, &responseBody)
if err != nil {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrFailedToRequestRemoteApi
}
choices, ok := responseBody["choices"].([]any)
if !ok || len(choices) < 1 {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no choices found in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
firstChoice, ok := choices[0].(map[string]any)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] invalid choice format in response for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
message, ok := firstChoice["message"].(map[string]any)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no message found in choice for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
content, ok := message["content"].(string)
if !ok {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
return nil, errs.ErrFailedToRequestRemoteApi
}
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```json")
content = strings.TrimSuffix(content, "```")
} else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
content = strings.TrimPrefix(content, "```")
content = strings.TrimSuffix(content, "```")
}
}
textualResponse := &LargeLanguageModelTextualResponse{
Content: content,
}
return textualResponse, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI common compatible provider
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.provider.GetReceiptImageRecognitionModelID()
}
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
requestMessages := make([]any, 0)
if request.SystemPrompt != "" {
requestMessages = append(requestMessages, map[string]string{
"role": "system",
"content": request.SystemPrompt,
})
}
if len(request.UserPrompt) > 0 {
if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
imageBase64Data := "data:image/png;base64," + base64.StdEncoding.EncodeToString(request.UserPrompt)
requestMessages = append(requestMessages, map[string]any{
"role": "user",
"content": []any{
core.O{
"type": "image_url",
"image_url": core.O{
"url": imageBase64Data,
},
},
},
})
} else {
requestMessages = append(requestMessages, map[string]string{
"role": "user",
"content": string(request.UserPrompt),
})
}
}
requestBody := make(map[string]any)
requestBody["model"] = modelId
requestBody["stream"] = request.Stream
requestBody["messages"] = requestMessages
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
if request.ResponseJsonObjectType != nil {
schemeGenerator := jsonschema.Reflector{
Anonymous: true,
DoNotReference: true,
ExpandedStruct: true,
}
schema := schemeGenerator.ReflectFromType(request.ResponseJsonObjectType)
schema.Version = ""
requestBody["response_format"] = core.O{
"type": "json_schema",
"json_schema": schema,
}
} else {
requestBody["response_format"] = core.O{
"type": "json_object",
}
}
}
requestBodyBytes, err := json.Marshal(requestBody)
if err != nil {
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
return nil, errs.ErrOperationFailed
}
log.Debugf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
return requestBodyBytes, nil
}
func newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(provider OpenAIChatCompletionsLargeLanguageModelProvider) LargeLanguageModelProvider {
return newCommonHttpLargeLanguageModelProvider(&OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: provider,
})
}
@@ -0,0 +1,157 @@
package llm
import (
"encoding/json"
"testing"
"github.com/stretchr/testify/assert"
"github.com/mayswind/ezbookkeeping/pkg/core"
)
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
request := &LargeLanguageModelRequest{
SystemPrompt: "You are a helpful assistant.",
UserPrompt: []byte("Hello, how are you?"),
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
request := &LargeLanguageModelRequest{
SystemPrompt: "What's in this image?",
UserPrompt: []byte("fakedata"),
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
}
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
var body map[string]interface{}
err = json.Unmarshal(bodyBytes, &body)
assert.Nil(t, err)
assert.Equal(t, "{\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":[{\"image_url\":{\"url\":\"data:image/png;base64,ZmFrZWRhdGE=\"},\"type\":\"image_url\"}],\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"created": 1234567890,
"model": "test",
"usage": {
"prompt_tokens": 13,
"completion_tokens": 7,
"total_tokens": 20
},
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": "This is a test response"
}
}
]
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "This is a test response", result.Content)
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"content": ""
}
}
]
}`
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.Nil(t, err)
assert.Equal(t, "", result.Content)
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "test-123",
"object": "chat.completion",
"choices": []
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := `{
"id": "chatcmpl-123",
"object": "chat.completion",
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant"
}
}
]
}`
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
provider: &OpenAILargeLanguageModelProvider{},
}
response := "error"
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
assert.EqualError(t, err, "failed to request third party api")
}
@@ -0,0 +1,58 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
const openAICompatibleChatCompletionsPath = "chat/completions"
// OpenAICompatibleLargeLanguageModelProvider defines the structure of OpenAI compatible large language model provider
type OpenAICompatibleLargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenAICompatibleBaseURL string
OpenAICompatibleAPIKey string
ReceiptImageRecognitionModelID string
}
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider
func (p *OpenAICompatibleLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", p.getFinalChatCompletionsRequestUrl(), nil)
if err != nil {
return nil, err
}
if p.OpenAICompatibleAPIKey != "" {
req.Header.Set("Authorization", "Bearer "+p.OpenAICompatibleAPIKey)
}
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI compatible provider
func (p *OpenAICompatibleLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string {
url := p.OpenAICompatibleBaseURL
if url[len(url)-1] != '/' {
url += "/"
}
url += openAICompatibleChatCompletionsPath
return url
}
// NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance
func NewOpenAICompatibleLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: config.OpenAICompatibleBaseURL,
OpenAICompatibleAPIKey: config.OpenAICompatibleAPIKey,
ReceiptImageRecognitionModelID: config.OpenAICompatibleReceiptImageRecognitionModelID,
})
}
@@ -0,0 +1,27 @@
package llm
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestOpenAICompatibleLargeLanguageModelProvider_GetFinalRequestUrl(t *testing.T) {
provider := &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://api.example.com/v1/",
}
url := provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
provider = &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://api.example.com/v1",
}
url = provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
provider = &OpenAICompatibleLargeLanguageModelProvider{
OpenAICompatibleBaseURL: "https://example.com/api",
}
url = provider.getFinalChatCompletionsRequestUrl()
assert.Equal(t, "https://example.com/api/chat/completions", url)
}
@@ -0,0 +1,43 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// OpenAILargeLanguageModelProvider defines the structure of OpenAI large language model provider
type OpenAILargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenAIAPIKey string
ReceiptImageRecognitionModelID string
}
const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions"
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI provider
func (p *OpenAILargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", openAIChatCompletionsUrl, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+p.OpenAIAPIKey)
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI provider
func (p *OpenAILargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
// NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance
func NewOpenAILargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{
OpenAIAPIKey: config.OpenAIAPIKey,
ReceiptImageRecognitionModelID: config.OpenAIReceiptImageRecognitionModelID,
})
}
@@ -0,0 +1,45 @@
package llm
import (
"net/http"
"github.com/mayswind/ezbookkeeping/pkg/core"
"github.com/mayswind/ezbookkeeping/pkg/settings"
)
// OpenRouterLargeLanguageModelProvider defines the structure of OpenRouter large language model provider
type OpenRouterLargeLanguageModelProvider struct {
OpenAIChatCompletionsLargeLanguageModelProvider
OpenRouterAPIKey string
ReceiptImageRecognitionModelID string
}
const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions"
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenRouter provider
func (p *OpenRouterLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
req, err := http.NewRequest("POST", openRouterChatCompletionsUrl, nil)
if err != nil {
return nil, err
}
req.Header.Set("Authorization", "Bearer "+p.OpenRouterAPIKey)
req.Header.Set("HTTP-Referer", "https://ezbookkeeping.mayswind.net/")
req.Header.Set("X-Title", "ezBookkeeping")
return req, nil
}
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenRouter provider
func (p *OpenRouterLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
return p.ReceiptImageRecognitionModelID
}
// NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance
func NewOpenRouterLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{
OpenRouterAPIKey: config.OpenRouterAPIKey,
ReceiptImageRecognitionModelID: config.OpenRouterReceiptImageRecognitionModelID,
})
}
+27
View File
@@ -0,0 +1,27 @@
package models
// RecognizedReceiptImageResponse represents a view-object of recognized receipt image response
type RecognizedReceiptImageResponse struct {
Type TransactionType `json:"type"`
Time int64 `json:"time,omitempty"`
CategoryId int64 `json:"categoryId,string,omitempty"`
SourceAccountId int64 `json:"sourceAccountId,string,omitempty"`
DestinationAccountId int64 `json:"destinationAccountId,string,omitempty"`
SourceAmount int64 `json:"sourceAmount,omitempty"`
DestinationAmount int64 `json:"destinationAmount,omitempty"`
TagIds []string `json:"tagIds,omitempty"`
Comment string `json:"comment,omitempty"`
}
// RecognizedReceiptImageResult represents the result of recognized receipt image
type RecognizedReceiptImageResult struct {
Type string `json:"type,omitempty" jsonschema:"enum=income,enum=expense,enum=transfer" jsonschema_description:"Transaction type (income, expense, transfer)"`
Time string `json:"time" jsonschema:"format=date-time" jsonschema_description:"Transaction time in long date time format (YYYY-MM-DD HH:mm:ss, e.g. 2023-01-01 12:00:00)"`
Amount string `json:"amount,omitempty" jsonschema_description:"Transaction amount"`
AccountName string `json:"account,omitempty" jsonschema_description:"Account name for the transaction"`
CategoryName string `json:"category,omitempty" jsonschema_description:"Category name for the transaction"`
TagNames []string `json:"tags,omitempty" jsonschema_description:"List of tags associated with the transaction (maximum 10 tags allowed)"`
Description string `json:"description,omitempty" jsonschema_description:"Transaction description"`
DestinationAmount string `json:"destination_amount,omitempty" jsonschema_description:"Destination amount for transfer transactions"`
DestinationAccountName string `json:"destination_account,omitempty" jsonschema_description:"Destination account name for transfer transactions"`
}
+73
View File
@@ -66,6 +66,13 @@ const (
WebDAVStorageType string = "webdav"
)
const (
OpenAILLMProvider string = "openai"
OpenAICompatibleLLMProvider string = "openai_compatible"
OpenRouterLLMProvider string = "openrouter"
OllamaLLMProvider string = "ollama"
)
// Uuid generator types
const (
InternalUuidGeneratorType string = "internal"
@@ -140,6 +147,9 @@ const (
defaultWebDAVRequestTimeout uint32 = 10000 // 10 seconds
defaultAIRecognitionPictureMaxSize uint32 = 10485760 // 10MB
defaultLargeLanguageModelAPIRequestTimeout uint32 = 60000 // 60 seconds
defaultInMemoryDuplicateCheckerCleanupInterval uint32 = 60 // 1 minutes
defaultDuplicateSubmissionsInterval uint32 = 300 // 5 minutes
@@ -281,6 +291,23 @@ type Config struct {
MinIOConfig *MinIOConfig
WebDAVConfig *WebDAVConfig
// Large Language Model
LLMProvider string
OpenAIAPIKey string
OpenAIReceiptImageRecognitionModelID string
OpenAICompatibleBaseURL string
OpenAICompatibleAPIKey string
OpenAICompatibleReceiptImageRecognitionModelID string
OpenRouterAPIKey string
OpenRouterReceiptImageRecognitionModelID string
OllamaServerURL string
OllamaReceiptImageRecognitionModelID string
TransactionFromAIImageRecognition bool
MaxAIRecognitionPictureFileSize uint32
LargeLanguageModelAPIRequestTimeout uint32
LargeLanguageModelAPIProxy string
LargeLanguageModelAPISkipTLSVerify bool
// Uuid
UuidGeneratorType string
UuidServerId uint8
@@ -426,6 +453,12 @@ func LoadConfiguration(configFilePath string) (*Config, error) {
return nil, err
}
err = loadLLMConfiguration(config, cfgFile, "llm")
if err != nil {
return nil, err
}
err = loadUuidConfiguration(config, cfgFile, "uuid")
if err != nil {
@@ -751,6 +784,46 @@ func loadStorageConfiguration(config *Config, configFile *ini.File, sectionName
return nil
}
func loadLLMConfiguration(config *Config, configFile *ini.File, sectionName string) error {
llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider")
if llmProvider == "" {
config.LLMProvider = ""
} else if llmProvider == OpenAILLMProvider {
config.LLMProvider = OpenAILLMProvider
} else if llmProvider == OpenAICompatibleLLMProvider {
config.LLMProvider = OpenAICompatibleLLMProvider
} else if llmProvider == OpenRouterLLMProvider {
config.LLMProvider = OpenRouterLLMProvider
} else if llmProvider == OllamaLLMProvider {
config.LLMProvider = OllamaLLMProvider
} else {
return errs.ErrInvalidLLMProvider
}
config.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key")
config.OpenAIReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_receipt_image_recognition_model_id")
config.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url")
config.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key")
config.OpenAICompatibleReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_receipt_image_recognition_model_id")
config.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key")
config.OpenRouterReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_receipt_image_recognition_model_id")
config.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url")
config.OllamaReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "ollama_receipt_image_recognition_model_id")
config.TransactionFromAIImageRecognition = getConfigItemBoolValue(configFile, sectionName, "transaction_from_ai_image_recognition", false)
config.MaxAIRecognitionPictureFileSize = getConfigItemUint32Value(configFile, sectionName, "max_ai_recognition_picture_size", defaultAIRecognitionPictureMaxSize)
config.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system")
config.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout)
config.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false)
return nil
}
func loadUuidConfiguration(config *Config, configFile *ini.File, sectionName string) error {
if getConfigItemStringValue(configFile, sectionName, "generator_type") == InternalUuidGeneratorType {
config.UuidGeneratorType = InternalUuidGeneratorType
+3 -2
View File
@@ -4,6 +4,7 @@ type KnownTemplate string
// Known templates
const (
TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email"
TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset"
TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email"
TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset"
SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION KnownTemplate = "prompt/receipt_image_recognition"
)