diff --git a/README.md b/README.md index 415bbf0e..1bffe1f7 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ Live Demo: [https://ezbookkeeping-demo.mayswind.net](https://ezbookkeeping-demo. - PWA support for native-like mobile experience - Dark mode - **AI-Powered Features** + - Receipt image recognition - Supports MCP (Model Context Protocol) for AI integration - **Powerful Bookkeeping** - Two-level accounts and categories diff --git a/cmd/initializer.go b/cmd/initializer.go index a26ec124..5389e003 100644 --- a/cmd/initializer.go +++ b/cmd/initializer.go @@ -9,6 +9,7 @@ import ( "github.com/mayswind/ezbookkeeping/pkg/datastore" "github.com/mayswind/ezbookkeeping/pkg/duplicatechecker" "github.com/mayswind/ezbookkeeping/pkg/exchangerates" + "github.com/mayswind/ezbookkeeping/pkg/llm" "github.com/mayswind/ezbookkeeping/pkg/log" "github.com/mayswind/ezbookkeeping/pkg/mail" "github.com/mayswind/ezbookkeeping/pkg/settings" @@ -90,6 +91,15 @@ func initializeSystem(c *core.CliContext) (*settings.Config, error) { return nil, err } + err = llm.InitializeLargeLanguageModelProvider(config) + + if err != nil { + if !isDisableBootLog { + log.BootErrorf(c, "[initializer.initializeSystem] initializes large language model provider failed, because %s", err.Error()) + } + return nil, err + } + err = uuid.InitializeUuidGenerator(config) if err != nil { @@ -155,6 +165,9 @@ func getConfigWithoutSensitiveData(config *settings.Config) *settings.Config { clonedConfig.DatabaseConfig.DatabasePassword = "****" clonedConfig.SMTPConfig.SMTPPasswd = "****" clonedConfig.MinIOConfig.SecretAccessKey = "****" + clonedConfig.OpenAIAPIKey = "****" + clonedConfig.OpenAICompatibleAPIKey = "****" + clonedConfig.OpenRouterAPIKey = "****" clonedConfig.SecretKey = "****" clonedConfig.AmapApplicationSecret = "****" diff --git a/cmd/webserver.go b/cmd/webserver.go index 424c77da..3342a237 100644 --- a/cmd/webserver.go +++ b/cmd/webserver.go @@ -396,6 +396,13 @@ func startWebServer(c *core.CliContext) error { apiV1Route.POST("/transaction/templates/move.json", bindApi(api.TransactionTemplates.TemplateMoveHandler)) apiV1Route.POST("/transaction/templates/delete.json", bindApi(api.TransactionTemplates.TemplateDeleteHandler)) + // Large Language Models + if config.LLMProvider != "" { + if config.TransactionFromAIImageRecognition { + apiV1Route.POST("/llm/transactions/recognize_receipt_image.json", bindApi(api.LargeLanguageModels.RecognizeReceiptImageHandler)) + } + } + // Exchange Rates apiV1Route.GET("/exchange_rates/latest.json", bindApi(api.ExchangeRates.LatestExchangeRateHandler)) apiV1Route.POST("/exchange_rates/user_custom/update.json", bindApi(api.ExchangeRates.UserCustomExchangeRateUpdateHandler)) diff --git a/conf/ezbookkeeping.ini b/conf/ezbookkeeping.ini index ab4f145a..979547f9 100644 --- a/conf/ezbookkeeping.ini +++ b/conf/ezbookkeeping.ini @@ -164,6 +164,53 @@ webdav_proxy = system # For "webdav" storage only, set to true to skip tls verification when connect webdav webdav_skip_tls_verify = false +[llm] +# Large Language Model (LLM) provider, supports the following types: "openai", "openai_compatible", "openrouter", "ollama" +llm_provider = + +# For "openai" llm provider only, OpenAI API secret key, please visit https://platform.openai.com/api-keys for more information +openai_api_key = + +# For "openai" llm provider only, receipt image recognition model for creating transactions from images +openai_receipt_image_recognition_model_id = + +# For "openai_compatible" llm provider only, OpenAI compatible API base url, e.g. "https://api.openai.com/v1/" +openai_compatible_base_url = + +# For "openai_compatible" llm provider only, OpenAI compatible API secret key +openai_compatible_api_key = + +# For "openai_compatible" llm provider only, receipt image recognition model for creating transactions from images +openai_compatible_receipt_image_recognition_model_id = + +# For "openrouter" llm provider only, OpenRouter API key, please visit https://openrouter.ai/settings/keys for more information +openrouter_api_key = + +# For "openrouter" llm provider only, receipt image recognition model for creating transactions from images +openrouter_receipt_image_recognition_model_id = + +# For "ollama" llm provider only, Ollama server url, e.g. "http://127.0.0.1:11434/" +ollama_server_url = + +# For "ollama" llm provider only, receipt image recognition model for creating transactions from images +ollama_receipt_image_recognition_model_id = + +# Set to true to enable creating transactions from AI image recognition results, requires llm_provider and its related receipt image recognition model to be configured properly +transaction_from_ai_image_recognition = false + +# Maximum allowed AI recognition picture file size (1 - 4294967295 bytes) +max_ai_recognition_picture_size = 10485760 + +# Requesting large language model api timeout (0 - 4294967295 milliseconds) +# Set to 0 to disable timeout for requesting large language model api, default is 60000 (60 seconds) +request_timeout = 60000 + +# Proxy for ezbookkeeping server requesting large language model api, supports "system" (use system proxy), "none" (do not use proxy), or proxy URL which starts with "http://", "https://" or "socks5://", default is "system" +proxy = system + +# Set to true to skip tls verification when request large language model api +skip_tls_verify = false + [uuid] # Uuid generator type, supports "internal" currently generator_type = internal diff --git a/pkg/api/large_language_models.go b/pkg/api/large_language_models.go new file mode 100644 index 00000000..69f8d0f9 --- /dev/null +++ b/pkg/api/large_language_models.go @@ -0,0 +1,345 @@ +package api + +import ( + "bytes" + "encoding/json" + "io" + "strings" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/llm" + "github.com/mayswind/ezbookkeeping/pkg/log" + "github.com/mayswind/ezbookkeeping/pkg/models" + "github.com/mayswind/ezbookkeeping/pkg/services" + "github.com/mayswind/ezbookkeeping/pkg/settings" + "github.com/mayswind/ezbookkeeping/pkg/templates" + "github.com/mayswind/ezbookkeeping/pkg/utils" +) + +// LargeLanguageModelsApi represents large language models api +type LargeLanguageModelsApi struct { + ApiUsingConfig + transactionCategories *services.TransactionCategoryService + transactionTags *services.TransactionTagService + accounts *services.AccountService + users *services.UserService +} + +// Initialize a large language models api singleton instance +var ( + LargeLanguageModels = &LargeLanguageModelsApi{ + ApiUsingConfig: ApiUsingConfig{ + container: settings.Container, + }, + transactionCategories: services.TransactionCategories, + transactionTags: services.TransactionTags, + accounts: services.Accounts, + users: services.Users, + } +) + +// RecognizeReceiptImageHandler returns the recognized receipt image result +func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext) (any, *errs.Error) { + if a.CurrentConfig().LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition { + return nil, errs.ErrLargeLanguageModelProviderNotEnabled + } + + utcOffset, err := c.GetClientTimezoneOffset() + + if err != nil { + log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] cannot get client timezone offset, because %s", err.Error()) + return nil, errs.ErrClientTimezoneOffsetInvalid + } + + uid := c.GetCurrentUid() + user, err := a.users.GetUserById(c, uid) + + if err != nil { + if !errs.IsCustomError(err) { + log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get user for user \"uid:%d\", because %s", uid, err.Error()) + } + + return false, errs.ErrUserNotFound + } + + if user.FeatureRestriction.Contains(core.USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION) { + return false, errs.ErrNotPermittedToPerformThisAction + } + + form, err := c.MultipartForm() + + if err != nil { + log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get multi-part form data for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrParameterInvalid + } + + imageFiles := form.File["image"] + + if len(imageFiles) < 1 { + log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] there is no image in request for user \"uid:%d\"", uid) + return nil, errs.ErrNoAIRecognitionImage + } + + if imageFiles[0].Size < 1 { + log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the size of image in request is zero for user \"uid:%d\"", uid) + return nil, errs.ErrAIRecognitionImageIsEmpty + } + + if imageFiles[0].Size > int64(a.CurrentConfig().MaxAIRecognitionPictureFileSize) { + log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the upload file size \"%d\" exceeds the maximum size \"%d\" of image for user \"uid:%d\"", imageFiles[0].Size, a.CurrentConfig().MaxAIRecognitionPictureFileSize, uid) + return nil, errs.ErrExceedMaxAIRecognitionImageFileSize + } + + fileExtension := utils.GetFileNameExtension(imageFiles[0].Filename) + + if utils.GetImageContentType(fileExtension) == "" { + log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the file extension \"%s\" of image in request is not supported for user \"uid:%d\"", fileExtension, uid) + return nil, errs.ErrImageTypeNotSupported + } + + imageFile, err := imageFiles[0].Open() + + if err != nil { + log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get image file from request for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrOperationFailed + } + + defer imageFile.Close() + + imageData, err := io.ReadAll(imageFile) + + if err != nil { + log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to read image file from request for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrOperationFailed + } + + accounts, err := a.accounts.GetAllAccountsByUid(c, uid) + + if err != nil { + log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get all accounts for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.Or(err, errs.ErrOperationFailed) + } + + accountMap := a.accounts.GetVisibleAccountNameMapByList(accounts) + accountNames := make([]string, 0, len(accounts)) + + for i := 0; i < len(accounts); i++ { + if accounts[i].Hidden || accounts[i].Type == models.ACCOUNT_TYPE_MULTI_SUB_ACCOUNTS { + continue + } + + accountNames = append(accountNames, accounts[i].Name) + } + + categories, err := a.transactionCategories.GetAllCategoriesByUid(c, uid, 0, -1) + + if err != nil { + log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get categories for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.Or(err, errs.ErrOperationFailed) + } + + incomeCategoryMap := make(map[string]*models.TransactionCategory) + incomeCategoryNames := make([]string, 0) + + expenseCategoryMap := make(map[string]*models.TransactionCategory) + expenseCategoryNames := make([]string, 0) + + transferCategoryMap := make(map[string]*models.TransactionCategory) + transferCategoryNames := make([]string, 0) + + for i := 0; i < len(categories); i++ { + category := categories[i] + + if category.Hidden || category.ParentCategoryId == models.LevelOneTransactionCategoryParentId { + continue + } + + if category.Type == models.CATEGORY_TYPE_INCOME { + incomeCategoryMap[category.Name] = category + incomeCategoryNames = append(incomeCategoryNames, category.Name) + } else if category.Type == models.CATEGORY_TYPE_EXPENSE { + expenseCategoryMap[category.Name] = category + expenseCategoryNames = append(expenseCategoryNames, category.Name) + } else if category.Type == models.CATEGORY_TYPE_TRANSFER { + transferCategoryMap[category.Name] = category + transferCategoryNames = append(transferCategoryNames, category.Name) + } + } + + tags, err := a.transactionTags.GetAllTagsByUid(c, uid) + + if err != nil { + log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get tags for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.Or(err, errs.ErrOperationFailed) + } + + tagMap := a.transactionTags.GetVisibleTagNameMapByList(tags) + tagNames := make([]string, 0, len(tags)) + + for i := 0; i < len(tags); i++ { + if tags[i].Hidden { + continue + } + + tagNames = append(tagNames, tags[i].Name) + } + + systemPrompt, err := templates.GetTemplate(templates.SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION) + + if err != nil { + return nil, errs.Or(err, errs.ErrOperationFailed) + } + + systemPromptParams := map[string]any{ + "AllExpenseCategoryNames": strings.Join(expenseCategoryNames, "\n"), + "AllIncomeCategoryNames": strings.Join(incomeCategoryNames, "\n"), + "AllTransferCategoryNames": strings.Join(transferCategoryNames, "\n"), + "AllAccountNames": strings.Join(accountNames, "\n"), + "AllTagNames": strings.Join(tagNames, "\n"), + } + + var bodyBuffer bytes.Buffer + err = systemPrompt.Execute(&bodyBuffer, systemPromptParams) + + if err != nil { + return nil, errs.Or(err, errs.ErrOperationFailed) + } + + llmRequest := &llm.LargeLanguageModelRequest{ + Stream: false, + SystemPrompt: strings.ReplaceAll(bodyBuffer.String(), "\r\n", "\n"), + UserPrompt: imageData, + UserPromptType: llm.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + } + + llmResponse, err := llm.Container.GetJsonResponseByReceiptImageRecognitionModel(c, c.GetCurrentUid(), a.CurrentConfig(), llmRequest) + + if err != nil { + return nil, errs.Or(err, errs.ErrOperationFailed) + } + + var result *models.RecognizedReceiptImageResult + + if err := json.Unmarshal([]byte(llmResponse.Content), &result); err != nil { + log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to unmarshal recognized receipt image result from llm response \"%s\" for user \"uid:%d\", because %s", llmResponse.Content, uid, err.Error()) + return nil, errs.Or(err, errs.ErrOperationFailed) + } + + return a.parseRecognizedReceiptImageResponse(c, uid, utcOffset, result, accountMap, expenseCategoryMap, incomeCategoryMap, transferCategoryMap, tagMap) +} + +func (a *LargeLanguageModelsApi) parseRecognizedReceiptImageResponse(c *core.WebContext, uid int64, utcOffset int16, recognizedResult *models.RecognizedReceiptImageResult, accountMap map[string]*models.Account, expenseCategoryMap map[string]*models.TransactionCategory, incomeCategoryMap map[string]*models.TransactionCategory, transferCategoryMap map[string]*models.TransactionCategory, tagMap map[string]*models.TransactionTag) (*models.RecognizedReceiptImageResponse, *errs.Error) { + recognizedReceiptImageResponse := &models.RecognizedReceiptImageResponse{ + Type: models.TRANSACTION_TYPE_EXPENSE, + } + + if recognizedResult == nil { + log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed result is null") + return nil, errs.ErrOperationFailed + } + + if recognizedResult.Type == "income" { + recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_INCOME + + if len(recognizedResult.CategoryName) > 0 { + category, exists := incomeCategoryMap[recognizedResult.CategoryName] + + if exists { + recognizedReceiptImageResponse.CategoryId = category.CategoryId + } + } + } else if recognizedResult.Type == "expense" { + recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_EXPENSE + + if len(recognizedResult.CategoryName) > 0 { + category, exists := expenseCategoryMap[recognizedResult.CategoryName] + + if exists { + recognizedReceiptImageResponse.CategoryId = category.CategoryId + } + } + } else if recognizedResult.Type == "transfer" { + recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_TRANSFER + + if len(recognizedResult.CategoryName) > 0 { + category, exists := transferCategoryMap[recognizedResult.CategoryName] + + if exists { + recognizedReceiptImageResponse.CategoryId = category.CategoryId + } + } + } else { + log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed transaction type \"%s\" is invalid", recognizedResult.Type) + return nil, errs.ErrOperationFailed + } + + if len(recognizedResult.Time) > 0 { + timestamp, err := utils.ParseFromLongDateTime(recognizedResult.Time, utcOffset) + + if err != nil { + log.Warnf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed time \"%s\" is invalid", recognizedResult.Time) + } else { + recognizedReceiptImageResponse.Time = timestamp.Unix() + } + } + + if len(recognizedResult.Amount) > 0 { + amount, err := utils.ParseAmount(recognizedResult.Amount) + + if err != nil { + log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed amount \"%s\" is invalid", recognizedResult.Amount) + return nil, errs.ErrOperationFailed + } + + recognizedReceiptImageResponse.SourceAmount = amount + + if recognizedReceiptImageResponse.Type == models.TRANSACTION_TYPE_TRANSFER && len(recognizedResult.DestinationAmount) > 0 { + destinationAmount, err := utils.ParseAmount(recognizedResult.DestinationAmount) + + if err != nil { + log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed destination amount \"%s\" is invalid", recognizedResult.DestinationAmount) + return nil, errs.ErrOperationFailed + } + + recognizedReceiptImageResponse.DestinationAmount = destinationAmount + } + } + + if len(recognizedResult.AccountName) > 0 { + account, exists := accountMap[recognizedResult.AccountName] + + if exists { + recognizedReceiptImageResponse.SourceAccountId = account.AccountId + } + } + + if len(recognizedResult.DestinationAccountName) > 0 { + account, exists := accountMap[recognizedResult.DestinationAccountName] + + if exists { + recognizedReceiptImageResponse.DestinationAccountId = account.AccountId + } + } + + if len(recognizedResult.TagNames) > 0 { + tagIds := make([]string, 0, len(recognizedResult.TagNames)) + + for i := 0; i < len(recognizedResult.TagNames); i++ { + tagName := recognizedResult.TagNames[i] + tag, exists := tagMap[tagName] + + if exists { + tagIds = append(tagIds, utils.Int64ToString(tag.TagId)) + } + } + + recognizedReceiptImageResponse.TagIds = tagIds + } + + if len(recognizedResult.Description) > 0 { + recognizedReceiptImageResponse.Comment = recognizedResult.Description + } + + return recognizedReceiptImageResponse, nil +} diff --git a/pkg/api/server_settings.go b/pkg/api/server_settings.go index ffc5b372..80c65fee 100644 --- a/pkg/api/server_settings.go +++ b/pkg/api/server_settings.go @@ -47,6 +47,12 @@ func (a *ServerSettingsApi) ServerSettingsJavascriptHandler(c *core.WebContext) a.appendBooleanSetting(builder, "mcp", config.EnableMCPServer) } + if config.LLMProvider != "" { + if config.TransactionFromAIImageRecognition { + a.appendBooleanSetting(builder, "llmt", config.TransactionFromAIImageRecognition) + } + } + if config.LoginPageTips.Enabled { a.appendMultiLanguageTipSetting(builder, "lpt", config.LoginPageTips) } diff --git a/pkg/core/user_feature_restriction.go b/pkg/core/user_feature_restriction.go index cec37c2b..2e0fa6ed 100644 --- a/pkg/core/user_feature_restriction.go +++ b/pkg/core/user_feature_restriction.go @@ -76,19 +76,20 @@ type UserFeatureRestrictionType uint64 // User Feature Restriction Type const ( - USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1 - USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2 - USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3 - USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4 - USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5 - USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6 - USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7 - USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8 - USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9 - USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10 - USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11 - USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12 - USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13 + USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1 + USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2 + USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3 + USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4 + USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5 + USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6 + USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7 + USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8 + USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9 + USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10 + USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11 + USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12 + USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13 + USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION UserFeatureRestrictionType = 14 ) const userFeatureRestrictionTypeMinValue UserFeatureRestrictionType = USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD diff --git a/pkg/errs/error.go b/pkg/errs/error.go index d0bb0f0e..41d463df 100644 --- a/pkg/errs/error.go +++ b/pkg/errs/error.go @@ -40,6 +40,7 @@ const ( NormalSubcategoryConverter = 12 NormalSubcategoryUserCustomExchangeRate = 13 NormalSubcategoryModelContextProtocol = 14 + NormalSubcategoryLargeLanguageModel = 15 ) // Error represents the specific error returned to user diff --git a/pkg/errs/large_language_model.go b/pkg/errs/large_language_model.go new file mode 100644 index 00000000..fff49341 --- /dev/null +++ b/pkg/errs/large_language_model.go @@ -0,0 +1,11 @@ +package errs + +import "net/http" + +// Error codes related to large language model features +var ( + ErrLargeLanguageModelProviderNotEnabled = NewNormalError(NormalSubcategoryLargeLanguageModel, 0, http.StatusBadRequest, "llm provider is not enabled") + ErrNoAIRecognitionImage = NewNormalError(NormalSubcategoryLargeLanguageModel, 1, http.StatusBadRequest, "no image for AI recognition") + ErrAIRecognitionImageIsEmpty = NewNormalError(NormalSubcategoryLargeLanguageModel, 2, http.StatusBadRequest, "image for AI recognition is empty") + ErrExceedMaxAIRecognitionImageFileSize = NewNormalError(NormalSubcategoryLargeLanguageModel, 3, http.StatusBadRequest, "exceed the maximum size of image file for AI recognition") +) diff --git a/pkg/errs/setting.go b/pkg/errs/setting.go index cfda4b12..8fc901b0 100644 --- a/pkg/errs/setting.go +++ b/pkg/errs/setting.go @@ -24,4 +24,6 @@ var ( ErrInvalidPasswordResetTokenExpiredTime = NewSystemError(SystemSubcategorySetting, 17, http.StatusInternalServerError, "invalid password reset token expired time") ErrInvalidExchangeRatesDataSource = NewSystemError(SystemSubcategorySetting, 18, http.StatusInternalServerError, "invalid exchange rates data source") ErrInvalidIpAddressPattern = NewSystemError(SystemSubcategorySetting, 19, http.StatusInternalServerError, "invalid ip address pattern") + ErrInvalidLLMProvider = NewSystemError(SystemSubcategorySetting, 20, http.StatusInternalServerError, "invalid llm provider") + ErrInvalidLLMModelId = NewSystemError(SystemSubcategorySetting, 21, http.StatusInternalServerError, "invalid llm model id") ) diff --git a/pkg/llm/http_large_language_model_provider.go b/pkg/llm/http_large_language_model_provider.go new file mode 100644 index 00000000..e7ea6bd9 --- /dev/null +++ b/pkg/llm/http_large_language_model_provider.go @@ -0,0 +1,91 @@ +package llm + +import ( + "crypto/tls" + "io" + "net/http" + "time" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/log" + "github.com/mayswind/ezbookkeeping/pkg/settings" + "github.com/mayswind/ezbookkeeping/pkg/utils" +) + +// HttpLargeLanguageModelProvider defines the structure of http large language model provider +type HttpLargeLanguageModelProvider interface { + // BuildTextualRequest returns the http request by the provider api definition + BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) + + // ParseTextualResponse returns the textual response entity by the provider api definition + ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) + + // GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string + GetReceiptImageRecognitionModelID() string +} + +// CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider +type CommonHttpLargeLanguageModelProvider struct { + LargeLanguageModelProvider + provider HttpLargeLanguageModelProvider +} + +// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the OpenAI common compatible large language model provider +func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { + return p.getTextualResponse(c, uid, currentConfig, request, p.provider.GetReceiptImageRecognitionModelID(), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) +} + +func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) { + if modelId == "" { + return nil, errs.ErrInvalidLLMModelId + } + + transport := http.DefaultTransport.(*http.Transport).Clone() + utils.SetProxyUrl(transport, currentConfig.LargeLanguageModelAPIProxy) + + if currentConfig.LargeLanguageModelAPISkipTLSVerify { + transport.TLSClientConfig = &tls.Config{ + InsecureSkipVerify: true, + } + } + + client := &http.Client{ + Transport: transport, + Timeout: time.Duration(currentConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond, + } + + httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, modelId, responseType) + + if err != nil { + log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrFailedToRequestRemoteApi + } + + httpRequest.Header.Set("User-Agent", settings.GetUserAgent()) + + resp, err := client.Do(httpRequest) + + if err != nil { + log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to request large language model api for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrFailedToRequestRemoteApi + } + + defer resp.Body.Close() + body, err := io.ReadAll(resp.Body) + + log.Debugf(c, "[http_large_language_model_provider.getTextualResponse] response is %s", body) + + if resp.StatusCode != 200 { + log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to get large language model api response for user \"uid:%d\", because response code is %d", uid, resp.StatusCode) + return nil, errs.ErrFailedToRequestRemoteApi + } + + return p.provider.ParseTextualResponse(c, uid, body, responseType) +} + +func newCommonHttpLargeLanguageModelProvider(provider HttpLargeLanguageModelProvider) *CommonHttpLargeLanguageModelProvider { + return &CommonHttpLargeLanguageModelProvider{ + provider: provider, + } +} diff --git a/pkg/llm/large_language_model_data.go b/pkg/llm/large_language_model_data.go new file mode 100644 index 00000000..f0c2d5cb --- /dev/null +++ b/pkg/llm/large_language_model_data.go @@ -0,0 +1,33 @@ +package llm + +import "reflect" + +type LargeLanguageModelRequestPromptType byte + +// Large Language Model Request Prompt Type +const ( + LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_TEXT LargeLanguageModelRequestPromptType = 0 + LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL LargeLanguageModelRequestPromptType = 1 +) + +type LargeLanguageModelResponseFormat byte + +// Large Language Model Response Format +const ( + LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_TEXT LargeLanguageModelResponseFormat = 0 + LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON LargeLanguageModelResponseFormat = 1 +) + +// LargeLanguageModelRequest represents a request to a large language model +type LargeLanguageModelRequest struct { + Stream bool + SystemPrompt string + UserPrompt []byte + UserPromptType LargeLanguageModelRequestPromptType + ResponseJsonObjectType reflect.Type +} + +// LargeLanguageModelTextualResponse represents a textual response from a large language model +type LargeLanguageModelTextualResponse struct { + Content string +} diff --git a/pkg/llm/large_language_model_provider.go b/pkg/llm/large_language_model_provider.go new file mode 100644 index 00000000..8ae58804 --- /dev/null +++ b/pkg/llm/large_language_model_provider.go @@ -0,0 +1,12 @@ +package llm + +import ( + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/settings" +) + +// LargeLanguageModelProvider defines the structure of large language model provider +type LargeLanguageModelProvider interface { + // GetJsonResponseByReceiptImageRecognitionModel returns the json response from the large language model provider by receipt image recognition model + GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) +} diff --git a/pkg/llm/large_language_model_provider_container.go b/pkg/llm/large_language_model_provider_container.go new file mode 100644 index 00000000..2bd1624c --- /dev/null +++ b/pkg/llm/large_language_model_provider_container.go @@ -0,0 +1,45 @@ +package llm + +import ( + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/settings" +) + +// LargeLanguageModelProviderContainer contains the current large language model provider +type LargeLanguageModelProviderContainer struct { + current LargeLanguageModelProvider +} + +// Initialize a large language model provider container singleton instance +var ( + Container = &LargeLanguageModelProviderContainer{} +) + +// InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config +func InitializeLargeLanguageModelProvider(config *settings.Config) error { + if config.LLMProvider == settings.OpenAILLMProvider { + Container.current = NewOpenAILargeLanguageModelProvider(config) + return nil + } else if config.LLMProvider == settings.OpenAICompatibleLLMProvider { + Container.current = NewOpenAICompatibleLargeLanguageModelProvider(config) + return nil + } else if config.LLMProvider == settings.OpenRouterLLMProvider { + Container.current = NewOpenRouterLargeLanguageModelProvider(config) + return nil + } else if config.LLMProvider == settings.OllamaLLMProvider { + Container.current = NewOllamaLargeLanguageModelProvider(config) + return nil + } + + return errs.ErrInvalidLLMProvider +} + +// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model +func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) { + if Container.current == nil { + return nil, errs.ErrInvalidLLMProvider + } + + return l.current.GetJsonResponseByReceiptImageRecognitionModel(c, uid, currentConfig, request) +} diff --git a/pkg/llm/ollama_large_language_model_provider.go b/pkg/llm/ollama_large_language_model_provider.go new file mode 100644 index 00000000..5a69ddae --- /dev/null +++ b/pkg/llm/ollama_large_language_model_provider.go @@ -0,0 +1,153 @@ +package llm + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "net/http" + "strings" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/log" + "github.com/mayswind/ezbookkeeping/pkg/settings" +) + +const ollamaChatCompletionsPath = "api/chat" + +// OllamaLargeLanguageModelProvider defines the structure of Ollama large language model provider +type OllamaLargeLanguageModelProvider struct { + CommonHttpLargeLanguageModelProvider + OllamaServerURL string + ReceiptImageRecognitionModelID string +} + +// BuildTextualRequest returns the http request by Ollama provider +func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { + requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType) + + if err != nil { + return nil, err + } + + httpRequest, err := http.NewRequest("POST", p.getOllamaRequestUrl(), bytes.NewReader(requestBody)) + + if err != nil { + return nil, err + } + + httpRequest.Header.Set("Content-Type", "application/json") + + return httpRequest, nil +} + +// ParseTextualResponse returns the textual response by Ollama provider +func (p *OllamaLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) { + responseBody := make(map[string]any) + err := json.Unmarshal(body, &responseBody) + + if err != nil { + log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrFailedToRequestRemoteApi + } + + message, ok := responseBody["message"].(map[string]any) + + if !ok { + log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no message found in response for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + content, ok := message["content"].(string) + + if !ok { + log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { + if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") { + content = strings.TrimPrefix(content, "```json") + content = strings.TrimSuffix(content, "```") + } else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") { + content = strings.TrimPrefix(content, "```") + content = strings.TrimSuffix(content, "```") + } + } + + textualResponse := &LargeLanguageModelTextualResponse{ + Content: content, + } + + return textualResponse, nil +} + +// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of Ollama provider +func (p *OllamaLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { + return p.ReceiptImageRecognitionModelID +} + +func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) { + requestMessages := make([]any, 0) + + if request.SystemPrompt != "" { + requestMessages = append(requestMessages, map[string]string{ + "role": "system", + "content": request.SystemPrompt, + }) + } + + if len(request.UserPrompt) > 0 { + imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt) + if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL { + requestMessages = append(requestMessages, map[string]any{ + "role": "user", + "content": "", + "images": []string{imageBase64Data}, + }) + } else { + requestMessages = append(requestMessages, map[string]string{ + "role": "user", + "content": string(request.UserPrompt), + }) + } + } + + requestBody := make(map[string]any) + requestBody["model"] = modelId + requestBody["stream"] = request.Stream + requestBody["messages"] = requestMessages + + if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { + requestBody["format"] = "json" + } + + requestBodyBytes, err := json.Marshal(requestBody) + + if err != nil { + log.Errorf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrOperationFailed + } + + log.Debugf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes) + return requestBodyBytes, nil +} + +func (p *OllamaLargeLanguageModelProvider) getOllamaRequestUrl() string { + url := p.OllamaServerURL + + if url[len(url)-1] != '/' { + url += "/" + } + + url += ollamaChatCompletionsPath + return url +} + +// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance +func NewOllamaLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { + return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{ + OllamaServerURL: config.OllamaServerURL, + ReceiptImageRecognitionModelID: config.OllamaReceiptImageRecognitionModelID, + }) +} diff --git a/pkg/llm/ollama_large_language_model_provider_test.go b/pkg/llm/ollama_large_language_model_provider_test.go new file mode 100644 index 00000000..8f530528 --- /dev/null +++ b/pkg/llm/ollama_large_language_model_provider_test.go @@ -0,0 +1,138 @@ +package llm + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/mayswind/ezbookkeeping/pkg/core" +) + +func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) { + provider := &OllamaLargeLanguageModelProvider{} + + request := &LargeLanguageModelRequest{ + SystemPrompt: "You are a helpful assistant.", + UserPrompt: []byte("Hello, how are you?"), + } + + bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + + var body map[string]interface{} + err = json.Unmarshal(bodyBytes, &body) + assert.Nil(t, err) + + assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes)) +} + +func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { + provider := &OllamaLargeLanguageModelProvider{} + + request := &LargeLanguageModelRequest{ + SystemPrompt: "What's in this image?", + UserPrompt: []byte("fakedata"), + UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + } + + bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + + var body map[string]interface{} + err = json.Unmarshal(bodyBytes, &body) + assert.Nil(t, err) + + assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":\"\",\"images\":[\"ZmFrZWRhdGE=\"],\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes)) +} + +func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) { + provider := &OllamaLargeLanguageModelProvider{} + + response := `{ + "model": "test", + "created_at": "2025-09-01T01:02:03.456789Z", + "message": { + "role": "assistant", + "content": "This is a test response" + } + }` + + result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + assert.Equal(t, "This is a test response", result.Content) +} + +func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) { + provider := &OllamaLargeLanguageModelProvider{} + + response := `{ + "model": "test", + "created_at": "2025-09-01T01:02:03.456789Z", + "message": { + "role": "assistant", + "content": "" + } + }` + + result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + assert.Equal(t, "", result.Content) +} + +func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) { + provider := &OllamaLargeLanguageModelProvider{} + + response := `{ + "model": "test", + "created_at": "2025-09-01T01:02:03.456789Z", + "message": {} + }` + + _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} + +func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) { + provider := &OllamaLargeLanguageModelProvider{} + + response := `{ + "model": "test", + "created_at": "2025-09-01T01:02:03.456789Z", + "message": { + "role": "assistant" + } + }` + + _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} + +func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) { + provider := &OllamaLargeLanguageModelProvider{} + + response := "error" + + _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} + +func TestOllamaLargeLanguageModelProvider_GetOllamaRequestUrl(t *testing.T) { + provider := &OllamaLargeLanguageModelProvider{ + OllamaServerURL: "http://localhost:11434/", + } + url := provider.getOllamaRequestUrl() + assert.Equal(t, "http://localhost:11434/api/chat", url) + + provider = &OllamaLargeLanguageModelProvider{ + OllamaServerURL: "http://localhost:11434", + } + url = provider.getOllamaRequestUrl() + assert.Equal(t, "http://localhost:11434/api/chat", url) + + provider = &OllamaLargeLanguageModelProvider{ + OllamaServerURL: "http://example.com/ollama/", + } + url = provider.getOllamaRequestUrl() + assert.Equal(t, "http://example.com/ollama/api/chat", url) +} diff --git a/pkg/llm/openai_common_compatible_large_language_model_provider.go b/pkg/llm/openai_common_compatible_large_language_model_provider.go new file mode 100644 index 00000000..60717b7b --- /dev/null +++ b/pkg/llm/openai_common_compatible_large_language_model_provider.go @@ -0,0 +1,187 @@ +package llm + +import ( + "bytes" + "encoding/base64" + "encoding/json" + "io" + "net/http" + "strings" + + "github.com/invopop/jsonschema" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/errs" + "github.com/mayswind/ezbookkeeping/pkg/log" +) + +// OpenAIChatCompletionsLargeLanguageModelProvider defines the structure of OpenAI chat completions compatible large language model provider +type OpenAIChatCompletionsLargeLanguageModelProvider interface { + // BuildChatCompletionsHttpRequest returns the chat completions http request + BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) + + // GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string + GetReceiptImageRecognitionModelID() string +} + +// OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api +type OpenAICommonChatCompletionsHttpLargeLanguageModelProvider struct { + CommonHttpLargeLanguageModelProvider + provider OpenAIChatCompletionsLargeLanguageModelProvider +} + +// BuildTextualRequest returns the http request by OpenAI common compatible provider +func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) { + requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType) + + if err != nil { + return nil, err + } + + httpRequest, err := p.provider.BuildChatCompletionsHttpRequest(c, uid) + + if err != nil { + return nil, err + } + + httpRequest.Body = io.NopCloser(bytes.NewReader(requestBody)) + httpRequest.Header.Set("Content-Type", "application/json") + + return httpRequest, nil +} + +// ParseTextualResponse returns the textual response by OpenAI common compatible provider +func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) { + responseBody := make(map[string]any) + err := json.Unmarshal(body, &responseBody) + + if err != nil { + log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrFailedToRequestRemoteApi + } + + choices, ok := responseBody["choices"].([]any) + + if !ok || len(choices) < 1 { + log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no choices found in response for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + firstChoice, ok := choices[0].(map[string]any) + + if !ok { + log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] invalid choice format in response for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + message, ok := firstChoice["message"].(map[string]any) + + if !ok { + log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no message found in choice for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + content, ok := message["content"].(string) + + if !ok { + log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid) + return nil, errs.ErrFailedToRequestRemoteApi + } + + if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { + if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") { + content = strings.TrimPrefix(content, "```json") + content = strings.TrimSuffix(content, "```") + } else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") { + content = strings.TrimPrefix(content, "```") + content = strings.TrimSuffix(content, "```") + } + } + + textualResponse := &LargeLanguageModelTextualResponse{ + Content: content, + } + + return textualResponse, nil +} + +// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI common compatible provider +func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { + return p.provider.GetReceiptImageRecognitionModelID() +} + +func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) { + requestMessages := make([]any, 0) + + if request.SystemPrompt != "" { + requestMessages = append(requestMessages, map[string]string{ + "role": "system", + "content": request.SystemPrompt, + }) + } + + if len(request.UserPrompt) > 0 { + if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL { + imageBase64Data := "data:image/png;base64," + base64.StdEncoding.EncodeToString(request.UserPrompt) + requestMessages = append(requestMessages, map[string]any{ + "role": "user", + "content": []any{ + core.O{ + "type": "image_url", + "image_url": core.O{ + "url": imageBase64Data, + }, + }, + }, + }) + } else { + requestMessages = append(requestMessages, map[string]string{ + "role": "user", + "content": string(request.UserPrompt), + }) + } + } + + requestBody := make(map[string]any) + requestBody["model"] = modelId + requestBody["stream"] = request.Stream + requestBody["messages"] = requestMessages + + if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON { + if request.ResponseJsonObjectType != nil { + schemeGenerator := jsonschema.Reflector{ + Anonymous: true, + DoNotReference: true, + ExpandedStruct: true, + } + + schema := schemeGenerator.ReflectFromType(request.ResponseJsonObjectType) + schema.Version = "" + + requestBody["response_format"] = core.O{ + "type": "json_schema", + "json_schema": schema, + } + } else { + requestBody["response_format"] = core.O{ + "type": "json_object", + } + } + } + + requestBodyBytes, err := json.Marshal(requestBody) + + if err != nil { + log.Errorf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error()) + return nil, errs.ErrOperationFailed + } + + log.Debugf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes) + return requestBodyBytes, nil +} + +func newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(provider OpenAIChatCompletionsLargeLanguageModelProvider) LargeLanguageModelProvider { + return newCommonHttpLargeLanguageModelProvider(&OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ + provider: provider, + }) +} diff --git a/pkg/llm/openai_common_compatible_large_language_model_provider_test.go b/pkg/llm/openai_common_compatible_large_language_model_provider_test.go new file mode 100644 index 00000000..f7e96958 --- /dev/null +++ b/pkg/llm/openai_common_compatible_large_language_model_provider_test.go @@ -0,0 +1,157 @@ +package llm + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/mayswind/ezbookkeeping/pkg/core" +) + +func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) { + provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ + provider: &OpenAILargeLanguageModelProvider{}, + } + + request := &LargeLanguageModelRequest{ + SystemPrompt: "You are a helpful assistant.", + UserPrompt: []byte("Hello, how are you?"), + } + + bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + + var body map[string]interface{} + err = json.Unmarshal(bodyBytes, &body) + assert.Nil(t, err) + + assert.Equal(t, "{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes)) +} + +func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) { + provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ + provider: &OpenAILargeLanguageModelProvider{}, + } + + request := &LargeLanguageModelRequest{ + SystemPrompt: "What's in this image?", + UserPrompt: []byte("fakedata"), + UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL, + } + + bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + + var body map[string]interface{} + err = json.Unmarshal(bodyBytes, &body) + assert.Nil(t, err) + + assert.Equal(t, "{\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":[{\"image_url\":{\"url\":\"data:image/png;base64,ZmFrZWRhdGE=\"},\"type\":\"image_url\"}],\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes)) +} + +func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) { + provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ + provider: &OpenAILargeLanguageModelProvider{}, + } + + response := `{ + "id": "test-123", + "object": "chat.completion", + "created": 1234567890, + "model": "test", + "usage": { + "prompt_tokens": 13, + "completion_tokens": 7, + "total_tokens": 20 + }, + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "role": "assistant", + "content": "This is a test response" + } + } + ] + }` + + result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + assert.Equal(t, "This is a test response", result.Content) +} + +func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) { + provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ + provider: &OpenAILargeLanguageModelProvider{}, + } + + response := `{ + "id": "test-123", + "object": "chat.completion", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "role": "assistant", + "content": "" + } + } + ] + }` + + result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.Nil(t, err) + assert.Equal(t, "", result.Content) +} + +func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) { + provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ + provider: &OpenAILargeLanguageModelProvider{}, + } + + response := `{ + "id": "test-123", + "object": "chat.completion", + "choices": [] + }` + + _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} + +func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) { + provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ + provider: &OpenAILargeLanguageModelProvider{}, + } + + response := `{ + "id": "chatcmpl-123", + "object": "chat.completion", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "message": { + "role": "assistant" + } + } + ] + }` + + _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} + +func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) { + provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{ + provider: &OpenAILargeLanguageModelProvider{}, + } + + response := "error" + + _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON) + assert.EqualError(t, err, "failed to request third party api") +} diff --git a/pkg/llm/openai_compatible_large_language_model_provider.go b/pkg/llm/openai_compatible_large_language_model_provider.go new file mode 100644 index 00000000..741cc874 --- /dev/null +++ b/pkg/llm/openai_compatible_large_language_model_provider.go @@ -0,0 +1,58 @@ +package llm + +import ( + "net/http" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/settings" +) + +const openAICompatibleChatCompletionsPath = "chat/completions" + +// OpenAICompatibleLargeLanguageModelProvider defines the structure of OpenAI compatible large language model provider +type OpenAICompatibleLargeLanguageModelProvider struct { + OpenAIChatCompletionsLargeLanguageModelProvider + OpenAICompatibleBaseURL string + OpenAICompatibleAPIKey string + ReceiptImageRecognitionModelID string +} + +// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider +func (p *OpenAICompatibleLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) { + req, err := http.NewRequest("POST", p.getFinalChatCompletionsRequestUrl(), nil) + + if err != nil { + return nil, err + } + + if p.OpenAICompatibleAPIKey != "" { + req.Header.Set("Authorization", "Bearer "+p.OpenAICompatibleAPIKey) + } + + return req, nil +} + +// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI compatible provider +func (p *OpenAICompatibleLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { + return p.ReceiptImageRecognitionModelID +} + +func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string { + url := p.OpenAICompatibleBaseURL + + if url[len(url)-1] != '/' { + url += "/" + } + + url += openAICompatibleChatCompletionsPath + return url +} + +// NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance +func NewOpenAICompatibleLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { + return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{ + OpenAICompatibleBaseURL: config.OpenAICompatibleBaseURL, + OpenAICompatibleAPIKey: config.OpenAICompatibleAPIKey, + ReceiptImageRecognitionModelID: config.OpenAICompatibleReceiptImageRecognitionModelID, + }) +} diff --git a/pkg/llm/openai_compatible_large_language_model_provider_test.go b/pkg/llm/openai_compatible_large_language_model_provider_test.go new file mode 100644 index 00000000..e288b233 --- /dev/null +++ b/pkg/llm/openai_compatible_large_language_model_provider_test.go @@ -0,0 +1,27 @@ +package llm + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestOpenAICompatibleLargeLanguageModelProvider_GetFinalRequestUrl(t *testing.T) { + provider := &OpenAICompatibleLargeLanguageModelProvider{ + OpenAICompatibleBaseURL: "https://api.example.com/v1/", + } + url := provider.getFinalChatCompletionsRequestUrl() + assert.Equal(t, "https://api.example.com/v1/chat/completions", url) + + provider = &OpenAICompatibleLargeLanguageModelProvider{ + OpenAICompatibleBaseURL: "https://api.example.com/v1", + } + url = provider.getFinalChatCompletionsRequestUrl() + assert.Equal(t, "https://api.example.com/v1/chat/completions", url) + + provider = &OpenAICompatibleLargeLanguageModelProvider{ + OpenAICompatibleBaseURL: "https://example.com/api", + } + url = provider.getFinalChatCompletionsRequestUrl() + assert.Equal(t, "https://example.com/api/chat/completions", url) +} diff --git a/pkg/llm/openai_large_language_model_provider.go b/pkg/llm/openai_large_language_model_provider.go new file mode 100644 index 00000000..2bb51352 --- /dev/null +++ b/pkg/llm/openai_large_language_model_provider.go @@ -0,0 +1,43 @@ +package llm + +import ( + "net/http" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/settings" +) + +// OpenAILargeLanguageModelProvider defines the structure of OpenAI large language model provider +type OpenAILargeLanguageModelProvider struct { + OpenAIChatCompletionsLargeLanguageModelProvider + OpenAIAPIKey string + ReceiptImageRecognitionModelID string +} + +const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions" + +// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI provider +func (p *OpenAILargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) { + req, err := http.NewRequest("POST", openAIChatCompletionsUrl, nil) + + if err != nil { + return nil, err + } + + req.Header.Set("Authorization", "Bearer "+p.OpenAIAPIKey) + + return req, nil +} + +// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI provider +func (p *OpenAILargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { + return p.ReceiptImageRecognitionModelID +} + +// NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance +func NewOpenAILargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { + return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{ + OpenAIAPIKey: config.OpenAIAPIKey, + ReceiptImageRecognitionModelID: config.OpenAIReceiptImageRecognitionModelID, + }) +} diff --git a/pkg/llm/openrouter_large_language_model_provider.go b/pkg/llm/openrouter_large_language_model_provider.go new file mode 100644 index 00000000..16873bf0 --- /dev/null +++ b/pkg/llm/openrouter_large_language_model_provider.go @@ -0,0 +1,45 @@ +package llm + +import ( + "net/http" + + "github.com/mayswind/ezbookkeeping/pkg/core" + "github.com/mayswind/ezbookkeeping/pkg/settings" +) + +// OpenRouterLargeLanguageModelProvider defines the structure of OpenRouter large language model provider +type OpenRouterLargeLanguageModelProvider struct { + OpenAIChatCompletionsLargeLanguageModelProvider + OpenRouterAPIKey string + ReceiptImageRecognitionModelID string +} + +const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions" + +// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenRouter provider +func (p *OpenRouterLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) { + req, err := http.NewRequest("POST", openRouterChatCompletionsUrl, nil) + + if err != nil { + return nil, err + } + + req.Header.Set("Authorization", "Bearer "+p.OpenRouterAPIKey) + req.Header.Set("HTTP-Referer", "https://ezbookkeeping.mayswind.net/") + req.Header.Set("X-Title", "ezBookkeeping") + + return req, nil +} + +// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenRouter provider +func (p *OpenRouterLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string { + return p.ReceiptImageRecognitionModelID +} + +// NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance +func NewOpenRouterLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider { + return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{ + OpenRouterAPIKey: config.OpenRouterAPIKey, + ReceiptImageRecognitionModelID: config.OpenRouterReceiptImageRecognitionModelID, + }) +} diff --git a/pkg/models/large_language_model.go b/pkg/models/large_language_model.go new file mode 100644 index 00000000..2e5e0d7f --- /dev/null +++ b/pkg/models/large_language_model.go @@ -0,0 +1,27 @@ +package models + +// RecognizedReceiptImageResponse represents a view-object of recognized receipt image response +type RecognizedReceiptImageResponse struct { + Type TransactionType `json:"type"` + Time int64 `json:"time,omitempty"` + CategoryId int64 `json:"categoryId,string,omitempty"` + SourceAccountId int64 `json:"sourceAccountId,string,omitempty"` + DestinationAccountId int64 `json:"destinationAccountId,string,omitempty"` + SourceAmount int64 `json:"sourceAmount,omitempty"` + DestinationAmount int64 `json:"destinationAmount,omitempty"` + TagIds []string `json:"tagIds,omitempty"` + Comment string `json:"comment,omitempty"` +} + +// RecognizedReceiptImageResult represents the result of recognized receipt image +type RecognizedReceiptImageResult struct { + Type string `json:"type,omitempty" jsonschema:"enum=income,enum=expense,enum=transfer" jsonschema_description:"Transaction type (income, expense, transfer)"` + Time string `json:"time" jsonschema:"format=date-time" jsonschema_description:"Transaction time in long date time format (YYYY-MM-DD HH:mm:ss, e.g. 2023-01-01 12:00:00)"` + Amount string `json:"amount,omitempty" jsonschema_description:"Transaction amount"` + AccountName string `json:"account,omitempty" jsonschema_description:"Account name for the transaction"` + CategoryName string `json:"category,omitempty" jsonschema_description:"Category name for the transaction"` + TagNames []string `json:"tags,omitempty" jsonschema_description:"List of tags associated with the transaction (maximum 10 tags allowed)"` + Description string `json:"description,omitempty" jsonschema_description:"Transaction description"` + DestinationAmount string `json:"destination_amount,omitempty" jsonschema_description:"Destination amount for transfer transactions"` + DestinationAccountName string `json:"destination_account,omitempty" jsonschema_description:"Destination account name for transfer transactions"` +} diff --git a/pkg/settings/setting.go b/pkg/settings/setting.go index 10e8fa0e..f3db6068 100644 --- a/pkg/settings/setting.go +++ b/pkg/settings/setting.go @@ -66,6 +66,13 @@ const ( WebDAVStorageType string = "webdav" ) +const ( + OpenAILLMProvider string = "openai" + OpenAICompatibleLLMProvider string = "openai_compatible" + OpenRouterLLMProvider string = "openrouter" + OllamaLLMProvider string = "ollama" +) + // Uuid generator types const ( InternalUuidGeneratorType string = "internal" @@ -140,6 +147,9 @@ const ( defaultWebDAVRequestTimeout uint32 = 10000 // 10 seconds + defaultAIRecognitionPictureMaxSize uint32 = 10485760 // 10MB + defaultLargeLanguageModelAPIRequestTimeout uint32 = 60000 // 60 seconds + defaultInMemoryDuplicateCheckerCleanupInterval uint32 = 60 // 1 minutes defaultDuplicateSubmissionsInterval uint32 = 300 // 5 minutes @@ -281,6 +291,23 @@ type Config struct { MinIOConfig *MinIOConfig WebDAVConfig *WebDAVConfig + // Large Language Model + LLMProvider string + OpenAIAPIKey string + OpenAIReceiptImageRecognitionModelID string + OpenAICompatibleBaseURL string + OpenAICompatibleAPIKey string + OpenAICompatibleReceiptImageRecognitionModelID string + OpenRouterAPIKey string + OpenRouterReceiptImageRecognitionModelID string + OllamaServerURL string + OllamaReceiptImageRecognitionModelID string + TransactionFromAIImageRecognition bool + MaxAIRecognitionPictureFileSize uint32 + LargeLanguageModelAPIRequestTimeout uint32 + LargeLanguageModelAPIProxy string + LargeLanguageModelAPISkipTLSVerify bool + // Uuid UuidGeneratorType string UuidServerId uint8 @@ -426,6 +453,12 @@ func LoadConfiguration(configFilePath string) (*Config, error) { return nil, err } + err = loadLLMConfiguration(config, cfgFile, "llm") + + if err != nil { + return nil, err + } + err = loadUuidConfiguration(config, cfgFile, "uuid") if err != nil { @@ -751,6 +784,46 @@ func loadStorageConfiguration(config *Config, configFile *ini.File, sectionName return nil } +func loadLLMConfiguration(config *Config, configFile *ini.File, sectionName string) error { + llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider") + + if llmProvider == "" { + config.LLMProvider = "" + } else if llmProvider == OpenAILLMProvider { + config.LLMProvider = OpenAILLMProvider + } else if llmProvider == OpenAICompatibleLLMProvider { + config.LLMProvider = OpenAICompatibleLLMProvider + } else if llmProvider == OpenRouterLLMProvider { + config.LLMProvider = OpenRouterLLMProvider + } else if llmProvider == OllamaLLMProvider { + config.LLMProvider = OllamaLLMProvider + } else { + return errs.ErrInvalidLLMProvider + } + + config.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key") + config.OpenAIReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_receipt_image_recognition_model_id") + + config.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url") + config.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key") + config.OpenAICompatibleReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_receipt_image_recognition_model_id") + + config.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key") + config.OpenRouterReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_receipt_image_recognition_model_id") + + config.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url") + config.OllamaReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "ollama_receipt_image_recognition_model_id") + + config.TransactionFromAIImageRecognition = getConfigItemBoolValue(configFile, sectionName, "transaction_from_ai_image_recognition", false) + config.MaxAIRecognitionPictureFileSize = getConfigItemUint32Value(configFile, sectionName, "max_ai_recognition_picture_size", defaultAIRecognitionPictureMaxSize) + + config.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system") + config.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout) + config.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false) + + return nil +} + func loadUuidConfiguration(config *Config, configFile *ini.File, sectionName string) error { if getConfigItemStringValue(configFile, sectionName, "generator_type") == InternalUuidGeneratorType { config.UuidGeneratorType = InternalUuidGeneratorType diff --git a/pkg/templates/known_template.go b/pkg/templates/known_template.go index 07290ddb..e8550b8d 100644 --- a/pkg/templates/known_template.go +++ b/pkg/templates/known_template.go @@ -4,6 +4,7 @@ type KnownTemplate string // Known templates const ( - TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email" - TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset" + TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email" + TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset" + SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION KnownTemplate = "prompt/receipt_image_recognition" ) diff --git a/src/components/mobile/AIImageRecognitionSheet.vue b/src/components/mobile/AIImageRecognitionSheet.vue new file mode 100644 index 00000000..a150c37d --- /dev/null +++ b/src/components/mobile/AIImageRecognitionSheet.vue @@ -0,0 +1,181 @@ + + + + + + diff --git a/src/consts/api.ts b/src/consts/api.ts index e73c3266..58d7b4f0 100644 --- a/src/consts/api.ts +++ b/src/consts/api.ts @@ -7,6 +7,7 @@ export const DEFAULT_API_TIMEOUT: number = 10000; // 10s export const DEFAULT_UPLOAD_API_TIMEOUT: number = 30000; // 30s export const DEFAULT_EXPORT_API_TIMEOUT: number = 180000; // 180s export const DEFAULT_IMPORT_API_TIMEOUT: number = 1800000; // 1800s +export const DEFAULT_LLM_API_TIMEOUT: number = 600000; // 600s export const GOOGLE_MAP_JAVASCRIPT_URL: string = 'https://maps.googleapis.com/maps/api/js'; export const BAIDU_MAP_JAVASCRIPT_URL: string = 'https://api.map.baidu.com/api?v=3.0'; diff --git a/src/core/file.ts b/src/core/file.ts index be86c5f2..51d9fcc4 100644 --- a/src/core/file.ts +++ b/src/core/file.ts @@ -6,6 +6,7 @@ export class KnownFileType { public static readonly TSV = new KnownFileType('tsv', 'text/tab-separated-values'); public static readonly MARKDOWN = new KnownFileType('md', 'text/markdown'); public static readonly JS = new KnownFileType('js', 'application/javascript'); + public static readonly JPG = new KnownFileType('jpg', 'image/jpeg'); public readonly extension: string; public readonly contentType: string; @@ -37,6 +38,12 @@ export class KnownFileType { }); } + public createFileFromBlob(blob: Blob, fileName: string): File { + return new File([blob], this.formatFileName(fileName), { + type: this.contentType, + }); + } + public static parse(extension: string): KnownFileType | undefined { return KnownFileType.allInstancesByExtension[extension]; } diff --git a/src/lib/server_settings.ts b/src/lib/server_settings.ts index 011c8a38..41ea3ff6 100644 --- a/src/lib/server_settings.ts +++ b/src/lib/server_settings.ts @@ -35,6 +35,10 @@ export function isMCPServerEnabled(): boolean { return getServerSetting('mcp') === 1; } +export function isTransactionFromAIImageRecognitionEnabled(): boolean { + return getServerSetting('llmt') === 1; +} + export function getLoginPageTips(): Record{ return getServerSetting('lpt') as Record; } diff --git a/src/lib/services.ts b/src/lib/services.ts index 93b0a9fe..62654f35 100644 --- a/src/lib/services.ts +++ b/src/lib/services.ts @@ -21,6 +21,7 @@ import { DEFAULT_UPLOAD_API_TIMEOUT, DEFAULT_EXPORT_API_TIMEOUT, DEFAULT_IMPORT_API_TIMEOUT, + DEFAULT_LLM_API_TIMEOUT, GOOGLE_MAP_JAVASCRIPT_URL, BAIDU_MAP_JAVASCRIPT_URL, AMAP_JAVASCRIPT_URL @@ -134,6 +135,9 @@ import type { import type { UserApplicationCloudSettingsUpdateRequest } from '@/models/user_app_cloud_setting.ts'; +import type { + RecognizedReceiptImageResponse +} from '@/models/large_language_model.ts'; import { getCurrentToken, @@ -635,6 +639,13 @@ export default { deleteTransactionTemplate: (req: TransactionTemplateDeleteRequest): ApiResponsePromise => { return axios.post>('v1/transaction/templates/delete.json', req); }, + recognizeReceiptImage: ({ imageFile }: { imageFile: File }): ApiResponsePromise => { + return axios.postForm>('v1/llm/transactions/recognize_receipt_image.json', { + image: imageFile + }, { + timeout: DEFAULT_LLM_API_TIMEOUT + }); + }, getLatestExchangeRates: (param: { ignoreError?: boolean }): ApiResponsePromise => { return axios.get>('v1/exchange_rates/latest.json', { ignoreError: !!param.ignoreError, diff --git a/src/lib/ui/common.ts b/src/lib/ui/common.ts index 4f4d5841..bebc2d27 100644 --- a/src/lib/ui/common.ts +++ b/src/lib/ui/common.ts @@ -3,6 +3,7 @@ import Clipboard from 'clipboard'; import { ThemeType } from '@/core/theme.ts'; import { type AmountColor, PresetAmountColor } from '@/core/color.ts'; +import { KnownFileType } from '@/core/file.ts'; import logger from '../logger.ts'; @@ -134,6 +135,64 @@ export function startDownloadFile(fileName: string, fileData: Blob): void { dataLink.click(); } +export function compressJpgImage(file: File, maxWidth: number, maxHeight: number, quality: number): Promise { + return new Promise((resolve, reject) => { + const reader = new FileReader(); + + reader.onload = (event) => { + const img = new Image(); + + img.onload = () => { + let width = img.width; + let height = img.height; + + if (width > maxWidth || height > maxHeight) { + const scale = Math.min(maxWidth / width, maxHeight / height); + width = Math.floor(width * scale); + height = Math.floor(height * scale); + } + + const canvas = document.createElement('canvas'); + const ctx = canvas.getContext('2d'); + + if (!ctx) { + reject(new Error('failed to get canvas context')); + return; + } + + canvas.width = width; + canvas.height = height; + + ctx.drawImage(img, 0, 0, width, height); + + canvas.toBlob((blob) => { + if (blob) { + resolve(blob); + } else { + reject(new Error('failed to compress image')); + } + }, KnownFileType.JPG.contentType, quality); + }; + + img.onerror = (error) => { + reject(error); + }; + + if (event.target && event.target.result) { + img.src = event.target.result as string; + } else { + reject(new Error('failed to read file')); + } + }; + + reader.onerror = (error) => { + reject(error); + }; + + reader.readAsDataURL(file); + }); +} + export function clearBrowserCaches(): Promise { if (!window.caches) { logger.error('caches API is not supported in this browser'); diff --git a/src/locales/de.json b/src/locales/de.json index 4da60c7a..6b931d14 100644 --- a/src/locales/de.json +++ b/src/locales/de.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency", "cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "Abfrageelemente dürfen nicht leer sein", "query items too much": "Zu viele Abfrageelemente", "query items have invalid item": "Ungültiges Element in Abfrageelementen", @@ -1389,6 +1393,7 @@ "Refresh": "Aktualisieren", "Clear": "Löschen", "Generate": "Generate", + "Recognize": "Recognize", "None": "Keine", "Unspecified": "Nicht angegeben", "Not set": "Nicht festgelegt", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Duplicate (With Time)", "Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)", "Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Kategorie", "Secondary Category": "Secondary Category", "Expense Category": "Expense Category", diff --git a/src/locales/en.json b/src/locales/en.json index e5598911..6cd318ab 100644 --- a/src/locales/en.json +++ b/src/locales/en.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency", "cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "There are no query items", "query items too much": "There are too many query items", "query items have invalid item": "There is invalid item in query items", @@ -1389,6 +1393,7 @@ "Refresh": "Refresh", "Clear": "Clear", "Generate": "Generate", + "Recognize": "Recognize", "None": "None", "Unspecified": "Unspecified", "Not set": "Not set", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Duplicate (With Time)", "Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)", "Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Category", "Secondary Category": "Secondary Category", "Expense Category": "Expense Category", diff --git a/src/locales/es.json b/src/locales/es.json index 732ae0e4..ca940517 100644 --- a/src/locales/es.json +++ b/src/locales/es.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency", "cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "--", "query items too much": "--", "query items have invalid item": "Hay un elemento no válido en los elementos de consulta", @@ -1389,6 +1393,7 @@ "Refresh": "Refrescar", "Clear": "Claro", "Generate": "Generate", + "Recognize": "Recognize", "None": "Ninguno", "Unspecified": "No especificado", "Not set": "No establecido", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Duplicate (With Time)", "Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)", "Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Categoría", "Secondary Category": "Secondary Category", "Expense Category": "Expense Category", diff --git a/src/locales/it.json b/src/locales/it.json index eee224d6..6ebfdee2 100644 --- a/src/locales/it.json +++ b/src/locales/it.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency", "cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "Non ci sono elementi di query", "query items too much": "Ci sono troppi elementi di query", "query items have invalid item": "C'è un elemento non valido negli elementi di query", @@ -1389,6 +1393,7 @@ "Refresh": "Aggiorna", "Clear": "Pulisci", "Generate": "Generate", + "Recognize": "Recognize", "None": "Nessuno", "Unspecified": "Non specificato", "Not set": "Non impostato", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Duplica (con ora)", "Duplicate (With Geographic Location)": "Duplica (con posizione geografica)", "Duplicate (With Time and Geographic Location)": "Duplica (con ora e posizione geografica)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Categoria", "Secondary Category": "Categoria secondaria", "Expense Category": "Expense Category", diff --git a/src/locales/ja.json b/src/locales/ja.json index 2548ff65..4ed42e63 100644 --- a/src/locales/ja.json +++ b/src/locales/ja.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency", "cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "クエリ項目がありません", "query items too much": "クエリ項目が多すぎます", "query items have invalid item": "クエリ項目に無効な項目があります", @@ -1389,6 +1393,7 @@ "Refresh": "リフレッシュ", "Clear": "消去", "Generate": "Generate", + "Recognize": "Recognize", "None": "なし", "Unspecified": "不特定", "Not set": "セットしていない", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "複製(時間含む)", "Duplicate (With Geographic Location)": "複製(地理座標を含む)", "Duplicate (With Time and Geographic Location)": "複製(時間と地理座標を含む)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "カテゴリ", "Secondary Category": "二次カテゴリ", "Expense Category": "Expense Category", diff --git a/src/locales/nl.json b/src/locales/nl.json index f303a99e..a0119750 100644 --- a/src/locales/nl.json +++ b/src/locales/nl.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Wisselkoersgegevens voor basisvaluta kunnen niet worden bijgewerkt", "cannot delete exchange rate data for base currency": "Wisselkoersgegevens voor basisvaluta kunnen niet worden verwijderd", "mcp server is not enabled": "MCP-server is niet ingeschakeld", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "Geen zoekitems opgegeven", "query items too much": "Te veel zoekitems", "query items have invalid item": "Ongeldig item in zoekitems", @@ -1389,6 +1393,7 @@ "Refresh": "Vernieuwen", "Clear": "Wissen", "Generate": "Genereren", + "Recognize": "Recognize", "None": "Geen", "Unspecified": "Niet gespecificeerd", "Not set": "Niet ingesteld", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Dupliceren (met tijd)", "Duplicate (With Geographic Location)": "Dupliceren (met geografische locatie)", "Duplicate (With Time and Geographic Location)": "Dupliceren (met tijd en locatie)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Categorie", "Secondary Category": "Secundaire categorie", "Expense Category": "Uitgavecategorie", diff --git a/src/locales/pt_BR.json b/src/locales/pt_BR.json index 587fd5ac..7faee390 100644 --- a/src/locales/pt_BR.json +++ b/src/locales/pt_BR.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Não é possível atualizar dados de taxa de câmbio para a moeda base", "cannot delete exchange rate data for base currency": "Não é possível excluir dados de taxa de câmbio para a moeda base", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "Não há itens de consulta", "query items too much": "Há muitos itens de consulta", "query items have invalid item": "Há item inválido nos itens de consulta", @@ -1389,6 +1393,7 @@ "Refresh": "Atualizar", "Clear": "Limpar", "Generate": "Generate", + "Recognize": "Recognize", "None": "Nenhum", "Unspecified": "Não especificado", "Not set": "Não definido", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Duplicar (Com Tempo)", "Duplicate (With Geographic Location)": "Duplicar (Com Localização Geográfica)", "Duplicate (With Time and Geographic Location)": "Duplicar (Com Tempo e Localização Geográfica)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Categoria", "Secondary Category": "Categoria Secundária", "Expense Category": "Expense Category", diff --git a/src/locales/ru.json b/src/locales/ru.json index 0c6d6e34..d5e5df63 100644 --- a/src/locales/ru.json +++ b/src/locales/ru.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency", "cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "Нет элементов запроса", "query items too much": "Слишком много элементов запроса", "query items have invalid item": "В элементах запроса присутствует недопустимый элемент", @@ -1389,6 +1393,7 @@ "Refresh": "Обновить", "Clear": "Очистить", "Generate": "Generate", + "Recognize": "Recognize", "None": "Нет", "Unspecified": "Не указано", "Not set": "Не установлено", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Duplicate (With Time)", "Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)", "Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Категория", "Secondary Category": "Secondary Category", "Expense Category": "Expense Category", diff --git a/src/locales/uk.json b/src/locales/uk.json index 6501ae44..5ef83a28 100644 --- a/src/locales/uk.json +++ b/src/locales/uk.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency", "cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "Елементи запиту не можуть бути порожніми", "query items too much": "Занадто багато елементів запиту", "query items have invalid item": "Запит містить недійсний елемент", @@ -1389,6 +1393,7 @@ "Refresh": "Оновити", "Clear": "Очистити", "Generate": "Generate", + "Recognize": "Recognize", "None": "Немає", "Unspecified": "Не вказано", "Not set": "Не встановлено", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Дублювати (з часом)", "Duplicate (With Geographic Location)": "Дублювати (з геолокацією)", "Duplicate (With Time and Geographic Location)": "Дублювати (з часом і геолокацією)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Категорія", "Secondary Category": "Вторинна категорія", "Expense Category": "Expense Category", diff --git a/src/locales/vi.json b/src/locales/vi.json index f22e49a2..c7feb23b 100644 --- a/src/locales/vi.json +++ b/src/locales/vi.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency", "cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency", "mcp server is not enabled": "MCP Server is not enabled", + "llm provider is not enabled": "Large Language Model provider is not enabled", + "no image for AI recognition": "There is no image for AI recognition", + "image for AI recognition is empty": "Image for AI recognition file is empty", + "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size", "query items cannot be blank": "Không có mục truy vấn", "query items too much": "Có quá nhiều mục truy vấn", "query items have invalid item": "Có mục không hợp lệ trong các mục truy vấn", @@ -1389,6 +1393,7 @@ "Refresh": "Làm mới", "Clear": "Xóa", "Generate": "Generate", + "Recognize": "Recognize", "None": "Không có", "Unspecified": "Không xác định", "Not set": "Not set", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "Duplicate (With Time)", "Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)", "Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)", + "AI Image Recognition": "AI Image Recognition", + "Choose from Library": "Choose from Library", + "Take Photo": "Take Photo", + "Unable to load image": "Unable to load image", + "Unable to recognize image": "Unable to recognize image", + "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one", + "Release to load image": "Release to load image", + "Please select a receipt or transaction image first": "Please select a receipt or transaction image first", "Category": "Danh mục", "Secondary Category": "Secondary Category", "Expense Category": "Expense Category", diff --git a/src/locales/zh_Hans.json b/src/locales/zh_Hans.json index a1cfa150..0504cb38 100644 --- a/src/locales/zh_Hans.json +++ b/src/locales/zh_Hans.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "不能更新默认货币的汇率数据", "cannot delete exchange rate data for base currency": "不能删除默认货币的汇率数据", "mcp server is not enabled": "MCP 服务器没有启用", + "llm provider is not enabled": "大语言模型服务提供者没有启用", + "no image for AI recognition": "没有用于AI识别的图片", + "image for AI recognition is empty": "用于AI识别的图片为空", + "exceed the maximum size of image file for AI recognition": "用于AI识别的图片超出了允许的最大文件大小", "query items cannot be blank": "请求项目不能为空", "query items too much": "请求项目过多", "query items have invalid item": "请求项目中有非法项目", @@ -1389,6 +1393,7 @@ "Refresh": "刷新", "Clear": "清除", "Generate": "生成", + "Recognize": "识别", "None": "无", "Unspecified": "未指定", "Not set": "未设置", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "复制 (含时间)", "Duplicate (With Geographic Location)": "复制 (含地理位置)", "Duplicate (With Time and Geographic Location)": "复制 (含时间和地理位置)", + "AI Image Recognition": "AI识图", + "Choose from Library": "从图库选择", + "Take Photo": "拍照", + "Unable to load image": "无法加载图片", + "Unable to recognize image": "无法识别图片", + "Drag and drop a receipt or transaction image here, or click to select one": "拖拽收据或交易图片到此处,或点击选择图片", + "Release to load image": "释放以加载图片", + "Please select a receipt or transaction image first": "请先选择收据或交易图片", "Category": "分类", "Secondary Category": "二级分类", "Expense Category": "支出分类", diff --git a/src/locales/zh_Hant.json b/src/locales/zh_Hant.json index 7eb66c95..3614c9b5 100644 --- a/src/locales/zh_Hant.json +++ b/src/locales/zh_Hant.json @@ -1224,6 +1224,10 @@ "cannot update exchange rate data for base currency": "不能更新基準貨幣的匯率資料", "cannot delete exchange rate data for base currency": "不能刪除基準貨幣的匯率資料", "mcp server is not enabled": "MCP 伺服器未啟用", + "llm provider is not enabled": "大型語言模型服務提供者未啟用", + "no image for AI recognition": "沒有用於AI識別的圖片檔案", + "image for AI recognition is empty": "用於AI識別的圖片檔案為空", + "exceed the maximum size of image file for AI recognition": "用於AI識別的圖片超出了允許的最大檔案大小", "query items cannot be blank": "查詢項目不能為空", "query items too much": "查詢項目過多", "query items have invalid item": "查詢項目中有非法項目", @@ -1389,6 +1393,7 @@ "Refresh": "重新載入", "Clear": "清除", "Generate": "產生", + "Recognize": "識別", "None": "無", "Unspecified": "未指定", "Not set": "未設置", @@ -1719,6 +1724,14 @@ "Duplicate (With Time)": "複製 (含時間)", "Duplicate (With Geographic Location)": "複製 (含地理位置)", "Duplicate (With Time and Geographic Location)": "複製 (含時間和地理位置)", + "AI Image Recognition": "AI識圖", + "Choose from Library": "從相簿選擇", + "Take Photo": "拍照", + "Unable to load image": "無法載入圖片", + "Unable to recognize image": "無法識別圖片", + "Drag and drop a receipt or transaction image here, or click to select one": "將收據或交易圖片拖放到此處,或點擊以選擇圖片", + "Release to load image": "放開以載入圖片", + "Please select a receipt or transaction image first": "請先選擇收據或交易圖片", "Category": "分類", "Secondary Category": "次分類", "Expense Category": "支出分類", diff --git a/src/mobile-main.ts b/src/mobile-main.ts index 7713f3dc..8822f19f 100644 --- a/src/mobile-main.ts +++ b/src/mobile-main.ts @@ -79,6 +79,7 @@ import MapSheet from '@/components/mobile/MapSheet.vue'; import TransactionTagSelectionSheet from '@/components/mobile/TransactionTagSelectionSheet.vue'; import ScheduleFrequencySheet from '@/components/mobile/ScheduleFrequencySheet.vue'; import AccountBalanceTrendsBarChart from '@/components/mobile/AccountBalanceTrendsBarChart.vue'; +import AIImageRecognitionSheet from '@/components/mobile/AIImageRecognitionSheet.vue'; import TextareaAutoSize from '@/directives/mobile/textareaAutoSize.ts'; @@ -170,8 +171,9 @@ app.component('InformationSheet', InformationSheet); app.component('NumberPadSheet', NumberPadSheet); app.component('MapSheet', MapSheet); app.component('TransactionTagSelectionSheet', TransactionTagSelectionSheet); -app.component('AccountBalanceTrendsBarChart', AccountBalanceTrendsBarChart); app.component('ScheduleFrequencySheet', ScheduleFrequencySheet); +app.component('AccountBalanceTrendsBarChart', AccountBalanceTrendsBarChart); +app.component('AIImageRecognitionSheet', AIImageRecognitionSheet); app.directive('TextareaAutoSize', TextareaAutoSize); diff --git a/src/models/large_language_model.ts b/src/models/large_language_model.ts new file mode 100644 index 00000000..2a9f3a78 --- /dev/null +++ b/src/models/large_language_model.ts @@ -0,0 +1,11 @@ +export interface RecognizedReceiptImageResponse { + readonly type: number; + readonly time?: number; + readonly categoryId?: string; + readonly sourceAccountId?: string; + readonly destinationAccountId?: string; + readonly sourceAmount?: number; + readonly destinationAmount?: number; + readonly tagIds?: string[]; + readonly comment?: string; +} diff --git a/src/stores/transaction.ts b/src/stores/transaction.ts index 462ee04c..bfd86854 100644 --- a/src/stores/transaction.ts +++ b/src/stores/transaction.ts @@ -33,6 +33,9 @@ import { import { type ExportTransactionDataRequest } from '@/models/data_management.ts'; +import type { + RecognizedReceiptImageResponse +} from '@/models/large_language_model.ts'; import { getUserTransactionDraft, @@ -1157,6 +1160,31 @@ export const useTransactionsStore = defineStore('transactions', () => { }); } + function recognizeReceiptImage({ imageFile }: { imageFile: File }): Promise { + return new Promise((resolve, reject) => { + services.recognizeReceiptImage({ imageFile }).then(response => { + const data = response.data; + + if (!data || !data.success || !data.result) { + reject({ message: 'Unable to recognize image' }); + return; + } + + resolve(data.result); + }).catch(error => { + logger.error('failed to recognize image', error); + + if (error.response && error.response.data && error.response.data.errorMessage) { + reject({ error: error.response.data }); + } else if (!error.processed) { + reject({ message: 'Unable to recognize image' }); + } else { + reject(error); + } + }); + }); + } + function parseImportDsvFile({ fileType, fileEncoding, importFile }: { fileType: string, fileEncoding?: string, importFile: File }): Promise { return new Promise((resolve, reject) => { services.parseImportDsvFile({ fileType, fileEncoding, importFile }).then(response => { @@ -1370,6 +1398,7 @@ export const useTransactionsStore = defineStore('transactions', () => { getTransaction, saveTransaction, deleteTransaction, + recognizeReceiptImage, parseImportDsvFile, parseImportTransaction, importTransactions, diff --git a/src/views/desktop/transactions/ListPage.vue b/src/views/desktop/transactions/ListPage.vue index 36ef9125..84658ba2 100644 --- a/src/views/desktop/transactions/ListPage.vue +++ b/src/views/desktop/transactions/ListPage.vue @@ -63,11 +63,16 @@ {{ tt('Add') }} - + - + @@ -620,6 +625,7 @@ @error="onShowDateRangeError" /> + @@ -647,6 +653,7 @@ import PaginationButtons from '@/components/desktop/PaginationButtons.vue'; import ConfirmDialog from '@/components/desktop/ConfirmDialog.vue'; import SnackBar from '@/components/desktop/SnackBar.vue'; import EditDialog from './list/dialogs/EditDialog.vue'; +import AIImageRecognitionDialog from './list/dialogs/AIImageRecognitionDialog.vue'; import ImportDialog from './import/ImportDialog.vue'; import AccountFilterSettingsCard from '@/views/desktop/common/cards/AccountFilterSettingsCard.vue'; import CategoryFilterSettingsCard from '@/views/desktop/common/cards/CategoryFilterSettingsCard.vue'; @@ -716,7 +723,7 @@ import { categoryTypeToTransactionType, transactionTypeToCategoryType } from '@/lib/category.ts'; -import { isDataExportingEnabled, isDataImportingEnabled } from '@/lib/server_settings.ts'; +import { isDataExportingEnabled, isDataImportingEnabled, isTransactionFromAIImageRecognitionEnabled } from '@/lib/server_settings.ts'; import { startDownloadFile } from '@/lib/ui/common.ts'; import { scrollToSelectedItem } from '@/lib/ui/desktop.ts'; import logger from '@/lib/logger.ts'; @@ -738,6 +745,7 @@ import { mdiMinusBoxMultipleOutline, mdiCloseBoxMultipleOutline, mdiPound, + mdiMagicStaff, mdiTextBoxOutline } from '@mdi/js'; @@ -760,6 +768,7 @@ const props = defineProps(); type ConfirmDialogType = InstanceType; type SnackBarType = InstanceType; type EditDialogType = InstanceType; +type AIImageRecognitionDialogType = InstanceType; type ImportDialogType = InstanceType; interface TransactionTemplateWithIcon { @@ -859,6 +868,7 @@ const tagFilterMenu = useTemplateRef('tagFilterMenu'); const confirmDialog = useTemplateRef('confirmDialog'); const snackbar = useTemplateRef('snackbar'); const editDialog = useTemplateRef('editDialog'); +const aiImageRecognitionDialog = useTemplateRef('aiImageRecognitionDialog'); const importDialog = useTemplateRef('importDialog'); const activeTab = ref('transactionPage'); @@ -1597,6 +1607,33 @@ function add(template?: TransactionTemplate): void { }); } +function addByRecognizingImage(): void { + aiImageRecognitionDialog.value?.open().then(result => { + editDialog.value?.open({ + time: result.time, + type: result.type, + categoryId: result.categoryId, + accountId: result.sourceAccountId, + destinationAccountId: result.destinationAccountId, + amount: result.sourceAmount, + destinationAmount: result.destinationAmount, + tagIds: result.tagIds ? result.tagIds.join(',') : undefined, + comment: result.comment, + noTransactionDraft: true + }).then(result => { + if (result && result.message) { + snackbar.value?.showMessage(result.message); + } + + reload(false, false); + }).catch(error => { + if (error) { + snackbar.value?.showError(error); + } + }); + }); +} + function importTransaction(): void { importDialog.value?.open().then(() => { reload(false, false); diff --git a/src/views/desktop/transactions/list/dialogs/AIImageRecognitionDialog.vue b/src/views/desktop/transactions/list/dialogs/AIImageRecognitionDialog.vue new file mode 100644 index 00000000..1d79f6f7 --- /dev/null +++ b/src/views/desktop/transactions/list/dialogs/AIImageRecognitionDialog.vue @@ -0,0 +1,208 @@ + + + + + diff --git a/src/views/mobile/HomePage.vue b/src/views/mobile/HomePage.vue index 211d12d6..25a4814c 100644 --- a/src/views/mobile/HomePage.vue +++ b/src/views/mobile/HomePage.vue @@ -188,7 +188,14 @@ - + + +