diff --git a/README.md b/README.md
index 415bbf0e..1bffe1f7 100644
--- a/README.md
+++ b/README.md
@@ -30,6 +30,7 @@ Live Demo: [https://ezbookkeeping-demo.mayswind.net](https://ezbookkeeping-demo.
- PWA support for native-like mobile experience
- Dark mode
- **AI-Powered Features**
+ - Receipt image recognition
- Supports MCP (Model Context Protocol) for AI integration
- **Powerful Bookkeeping**
- Two-level accounts and categories
diff --git a/cmd/initializer.go b/cmd/initializer.go
index a26ec124..5389e003 100644
--- a/cmd/initializer.go
+++ b/cmd/initializer.go
@@ -9,6 +9,7 @@ import (
"github.com/mayswind/ezbookkeeping/pkg/datastore"
"github.com/mayswind/ezbookkeeping/pkg/duplicatechecker"
"github.com/mayswind/ezbookkeeping/pkg/exchangerates"
+ "github.com/mayswind/ezbookkeeping/pkg/llm"
"github.com/mayswind/ezbookkeeping/pkg/log"
"github.com/mayswind/ezbookkeeping/pkg/mail"
"github.com/mayswind/ezbookkeeping/pkg/settings"
@@ -90,6 +91,15 @@ func initializeSystem(c *core.CliContext) (*settings.Config, error) {
return nil, err
}
+ err = llm.InitializeLargeLanguageModelProvider(config)
+
+ if err != nil {
+ if !isDisableBootLog {
+ log.BootErrorf(c, "[initializer.initializeSystem] initializes large language model provider failed, because %s", err.Error())
+ }
+ return nil, err
+ }
+
err = uuid.InitializeUuidGenerator(config)
if err != nil {
@@ -155,6 +165,9 @@ func getConfigWithoutSensitiveData(config *settings.Config) *settings.Config {
clonedConfig.DatabaseConfig.DatabasePassword = "****"
clonedConfig.SMTPConfig.SMTPPasswd = "****"
clonedConfig.MinIOConfig.SecretAccessKey = "****"
+ clonedConfig.OpenAIAPIKey = "****"
+ clonedConfig.OpenAICompatibleAPIKey = "****"
+ clonedConfig.OpenRouterAPIKey = "****"
clonedConfig.SecretKey = "****"
clonedConfig.AmapApplicationSecret = "****"
diff --git a/cmd/webserver.go b/cmd/webserver.go
index 424c77da..3342a237 100644
--- a/cmd/webserver.go
+++ b/cmd/webserver.go
@@ -396,6 +396,13 @@ func startWebServer(c *core.CliContext) error {
apiV1Route.POST("/transaction/templates/move.json", bindApi(api.TransactionTemplates.TemplateMoveHandler))
apiV1Route.POST("/transaction/templates/delete.json", bindApi(api.TransactionTemplates.TemplateDeleteHandler))
+ // Large Language Models
+ if config.LLMProvider != "" {
+ if config.TransactionFromAIImageRecognition {
+ apiV1Route.POST("/llm/transactions/recognize_receipt_image.json", bindApi(api.LargeLanguageModels.RecognizeReceiptImageHandler))
+ }
+ }
+
// Exchange Rates
apiV1Route.GET("/exchange_rates/latest.json", bindApi(api.ExchangeRates.LatestExchangeRateHandler))
apiV1Route.POST("/exchange_rates/user_custom/update.json", bindApi(api.ExchangeRates.UserCustomExchangeRateUpdateHandler))
diff --git a/conf/ezbookkeeping.ini b/conf/ezbookkeeping.ini
index ab4f145a..979547f9 100644
--- a/conf/ezbookkeeping.ini
+++ b/conf/ezbookkeeping.ini
@@ -164,6 +164,53 @@ webdav_proxy = system
# For "webdav" storage only, set to true to skip tls verification when connect webdav
webdav_skip_tls_verify = false
+[llm]
+# Large Language Model (LLM) provider, supports the following types: "openai", "openai_compatible", "openrouter", "ollama"
+llm_provider =
+
+# For "openai" llm provider only, OpenAI API secret key, please visit https://platform.openai.com/api-keys for more information
+openai_api_key =
+
+# For "openai" llm provider only, receipt image recognition model for creating transactions from images
+openai_receipt_image_recognition_model_id =
+
+# For "openai_compatible" llm provider only, OpenAI compatible API base url, e.g. "https://api.openai.com/v1/"
+openai_compatible_base_url =
+
+# For "openai_compatible" llm provider only, OpenAI compatible API secret key
+openai_compatible_api_key =
+
+# For "openai_compatible" llm provider only, receipt image recognition model for creating transactions from images
+openai_compatible_receipt_image_recognition_model_id =
+
+# For "openrouter" llm provider only, OpenRouter API key, please visit https://openrouter.ai/settings/keys for more information
+openrouter_api_key =
+
+# For "openrouter" llm provider only, receipt image recognition model for creating transactions from images
+openrouter_receipt_image_recognition_model_id =
+
+# For "ollama" llm provider only, Ollama server url, e.g. "http://127.0.0.1:11434/"
+ollama_server_url =
+
+# For "ollama" llm provider only, receipt image recognition model for creating transactions from images
+ollama_receipt_image_recognition_model_id =
+
+# Set to true to enable creating transactions from AI image recognition results, requires llm_provider and its related receipt image recognition model to be configured properly
+transaction_from_ai_image_recognition = false
+
+# Maximum allowed AI recognition picture file size (1 - 4294967295 bytes)
+max_ai_recognition_picture_size = 10485760
+
+# Requesting large language model api timeout (0 - 4294967295 milliseconds)
+# Set to 0 to disable timeout for requesting large language model api, default is 60000 (60 seconds)
+request_timeout = 60000
+
+# Proxy for ezbookkeeping server requesting large language model api, supports "system" (use system proxy), "none" (do not use proxy), or proxy URL which starts with "http://", "https://" or "socks5://", default is "system"
+proxy = system
+
+# Set to true to skip tls verification when request large language model api
+skip_tls_verify = false
+
[uuid]
# Uuid generator type, supports "internal" currently
generator_type = internal
diff --git a/pkg/api/large_language_models.go b/pkg/api/large_language_models.go
new file mode 100644
index 00000000..69f8d0f9
--- /dev/null
+++ b/pkg/api/large_language_models.go
@@ -0,0 +1,345 @@
+package api
+
+import (
+ "bytes"
+ "encoding/json"
+ "io"
+ "strings"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/errs"
+ "github.com/mayswind/ezbookkeeping/pkg/llm"
+ "github.com/mayswind/ezbookkeeping/pkg/log"
+ "github.com/mayswind/ezbookkeeping/pkg/models"
+ "github.com/mayswind/ezbookkeeping/pkg/services"
+ "github.com/mayswind/ezbookkeeping/pkg/settings"
+ "github.com/mayswind/ezbookkeeping/pkg/templates"
+ "github.com/mayswind/ezbookkeeping/pkg/utils"
+)
+
+// LargeLanguageModelsApi represents large language models api
+type LargeLanguageModelsApi struct {
+ ApiUsingConfig
+ transactionCategories *services.TransactionCategoryService
+ transactionTags *services.TransactionTagService
+ accounts *services.AccountService
+ users *services.UserService
+}
+
+// Initialize a large language models api singleton instance
+var (
+ LargeLanguageModels = &LargeLanguageModelsApi{
+ ApiUsingConfig: ApiUsingConfig{
+ container: settings.Container,
+ },
+ transactionCategories: services.TransactionCategories,
+ transactionTags: services.TransactionTags,
+ accounts: services.Accounts,
+ users: services.Users,
+ }
+)
+
+// RecognizeReceiptImageHandler returns the recognized receipt image result
+func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext) (any, *errs.Error) {
+ if a.CurrentConfig().LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition {
+ return nil, errs.ErrLargeLanguageModelProviderNotEnabled
+ }
+
+ utcOffset, err := c.GetClientTimezoneOffset()
+
+ if err != nil {
+ log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] cannot get client timezone offset, because %s", err.Error())
+ return nil, errs.ErrClientTimezoneOffsetInvalid
+ }
+
+ uid := c.GetCurrentUid()
+ user, err := a.users.GetUserById(c, uid)
+
+ if err != nil {
+ if !errs.IsCustomError(err) {
+ log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get user for user \"uid:%d\", because %s", uid, err.Error())
+ }
+
+ return false, errs.ErrUserNotFound
+ }
+
+ if user.FeatureRestriction.Contains(core.USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION) {
+ return false, errs.ErrNotPermittedToPerformThisAction
+ }
+
+ form, err := c.MultipartForm()
+
+ if err != nil {
+ log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get multi-part form data for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrParameterInvalid
+ }
+
+ imageFiles := form.File["image"]
+
+ if len(imageFiles) < 1 {
+ log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] there is no image in request for user \"uid:%d\"", uid)
+ return nil, errs.ErrNoAIRecognitionImage
+ }
+
+ if imageFiles[0].Size < 1 {
+ log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the size of image in request is zero for user \"uid:%d\"", uid)
+ return nil, errs.ErrAIRecognitionImageIsEmpty
+ }
+
+ if imageFiles[0].Size > int64(a.CurrentConfig().MaxAIRecognitionPictureFileSize) {
+ log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the upload file size \"%d\" exceeds the maximum size \"%d\" of image for user \"uid:%d\"", imageFiles[0].Size, a.CurrentConfig().MaxAIRecognitionPictureFileSize, uid)
+ return nil, errs.ErrExceedMaxAIRecognitionImageFileSize
+ }
+
+ fileExtension := utils.GetFileNameExtension(imageFiles[0].Filename)
+
+ if utils.GetImageContentType(fileExtension) == "" {
+ log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the file extension \"%s\" of image in request is not supported for user \"uid:%d\"", fileExtension, uid)
+ return nil, errs.ErrImageTypeNotSupported
+ }
+
+ imageFile, err := imageFiles[0].Open()
+
+ if err != nil {
+ log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get image file from request for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrOperationFailed
+ }
+
+ defer imageFile.Close()
+
+ imageData, err := io.ReadAll(imageFile)
+
+ if err != nil {
+ log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to read image file from request for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrOperationFailed
+ }
+
+ accounts, err := a.accounts.GetAllAccountsByUid(c, uid)
+
+ if err != nil {
+ log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get all accounts for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.Or(err, errs.ErrOperationFailed)
+ }
+
+ accountMap := a.accounts.GetVisibleAccountNameMapByList(accounts)
+ accountNames := make([]string, 0, len(accounts))
+
+ for i := 0; i < len(accounts); i++ {
+ if accounts[i].Hidden || accounts[i].Type == models.ACCOUNT_TYPE_MULTI_SUB_ACCOUNTS {
+ continue
+ }
+
+ accountNames = append(accountNames, accounts[i].Name)
+ }
+
+ categories, err := a.transactionCategories.GetAllCategoriesByUid(c, uid, 0, -1)
+
+ if err != nil {
+ log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get categories for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.Or(err, errs.ErrOperationFailed)
+ }
+
+ incomeCategoryMap := make(map[string]*models.TransactionCategory)
+ incomeCategoryNames := make([]string, 0)
+
+ expenseCategoryMap := make(map[string]*models.TransactionCategory)
+ expenseCategoryNames := make([]string, 0)
+
+ transferCategoryMap := make(map[string]*models.TransactionCategory)
+ transferCategoryNames := make([]string, 0)
+
+ for i := 0; i < len(categories); i++ {
+ category := categories[i]
+
+ if category.Hidden || category.ParentCategoryId == models.LevelOneTransactionCategoryParentId {
+ continue
+ }
+
+ if category.Type == models.CATEGORY_TYPE_INCOME {
+ incomeCategoryMap[category.Name] = category
+ incomeCategoryNames = append(incomeCategoryNames, category.Name)
+ } else if category.Type == models.CATEGORY_TYPE_EXPENSE {
+ expenseCategoryMap[category.Name] = category
+ expenseCategoryNames = append(expenseCategoryNames, category.Name)
+ } else if category.Type == models.CATEGORY_TYPE_TRANSFER {
+ transferCategoryMap[category.Name] = category
+ transferCategoryNames = append(transferCategoryNames, category.Name)
+ }
+ }
+
+ tags, err := a.transactionTags.GetAllTagsByUid(c, uid)
+
+ if err != nil {
+ log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get tags for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.Or(err, errs.ErrOperationFailed)
+ }
+
+ tagMap := a.transactionTags.GetVisibleTagNameMapByList(tags)
+ tagNames := make([]string, 0, len(tags))
+
+ for i := 0; i < len(tags); i++ {
+ if tags[i].Hidden {
+ continue
+ }
+
+ tagNames = append(tagNames, tags[i].Name)
+ }
+
+ systemPrompt, err := templates.GetTemplate(templates.SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION)
+
+ if err != nil {
+ return nil, errs.Or(err, errs.ErrOperationFailed)
+ }
+
+ systemPromptParams := map[string]any{
+ "AllExpenseCategoryNames": strings.Join(expenseCategoryNames, "\n"),
+ "AllIncomeCategoryNames": strings.Join(incomeCategoryNames, "\n"),
+ "AllTransferCategoryNames": strings.Join(transferCategoryNames, "\n"),
+ "AllAccountNames": strings.Join(accountNames, "\n"),
+ "AllTagNames": strings.Join(tagNames, "\n"),
+ }
+
+ var bodyBuffer bytes.Buffer
+ err = systemPrompt.Execute(&bodyBuffer, systemPromptParams)
+
+ if err != nil {
+ return nil, errs.Or(err, errs.ErrOperationFailed)
+ }
+
+ llmRequest := &llm.LargeLanguageModelRequest{
+ Stream: false,
+ SystemPrompt: strings.ReplaceAll(bodyBuffer.String(), "\r\n", "\n"),
+ UserPrompt: imageData,
+ UserPromptType: llm.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
+ }
+
+ llmResponse, err := llm.Container.GetJsonResponseByReceiptImageRecognitionModel(c, c.GetCurrentUid(), a.CurrentConfig(), llmRequest)
+
+ if err != nil {
+ return nil, errs.Or(err, errs.ErrOperationFailed)
+ }
+
+ var result *models.RecognizedReceiptImageResult
+
+ if err := json.Unmarshal([]byte(llmResponse.Content), &result); err != nil {
+ log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to unmarshal recognized receipt image result from llm response \"%s\" for user \"uid:%d\", because %s", llmResponse.Content, uid, err.Error())
+ return nil, errs.Or(err, errs.ErrOperationFailed)
+ }
+
+ return a.parseRecognizedReceiptImageResponse(c, uid, utcOffset, result, accountMap, expenseCategoryMap, incomeCategoryMap, transferCategoryMap, tagMap)
+}
+
+func (a *LargeLanguageModelsApi) parseRecognizedReceiptImageResponse(c *core.WebContext, uid int64, utcOffset int16, recognizedResult *models.RecognizedReceiptImageResult, accountMap map[string]*models.Account, expenseCategoryMap map[string]*models.TransactionCategory, incomeCategoryMap map[string]*models.TransactionCategory, transferCategoryMap map[string]*models.TransactionCategory, tagMap map[string]*models.TransactionTag) (*models.RecognizedReceiptImageResponse, *errs.Error) {
+ recognizedReceiptImageResponse := &models.RecognizedReceiptImageResponse{
+ Type: models.TRANSACTION_TYPE_EXPENSE,
+ }
+
+ if recognizedResult == nil {
+ log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed result is null")
+ return nil, errs.ErrOperationFailed
+ }
+
+ if recognizedResult.Type == "income" {
+ recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_INCOME
+
+ if len(recognizedResult.CategoryName) > 0 {
+ category, exists := incomeCategoryMap[recognizedResult.CategoryName]
+
+ if exists {
+ recognizedReceiptImageResponse.CategoryId = category.CategoryId
+ }
+ }
+ } else if recognizedResult.Type == "expense" {
+ recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_EXPENSE
+
+ if len(recognizedResult.CategoryName) > 0 {
+ category, exists := expenseCategoryMap[recognizedResult.CategoryName]
+
+ if exists {
+ recognizedReceiptImageResponse.CategoryId = category.CategoryId
+ }
+ }
+ } else if recognizedResult.Type == "transfer" {
+ recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_TRANSFER
+
+ if len(recognizedResult.CategoryName) > 0 {
+ category, exists := transferCategoryMap[recognizedResult.CategoryName]
+
+ if exists {
+ recognizedReceiptImageResponse.CategoryId = category.CategoryId
+ }
+ }
+ } else {
+ log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed transaction type \"%s\" is invalid", recognizedResult.Type)
+ return nil, errs.ErrOperationFailed
+ }
+
+ if len(recognizedResult.Time) > 0 {
+ timestamp, err := utils.ParseFromLongDateTime(recognizedResult.Time, utcOffset)
+
+ if err != nil {
+ log.Warnf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed time \"%s\" is invalid", recognizedResult.Time)
+ } else {
+ recognizedReceiptImageResponse.Time = timestamp.Unix()
+ }
+ }
+
+ if len(recognizedResult.Amount) > 0 {
+ amount, err := utils.ParseAmount(recognizedResult.Amount)
+
+ if err != nil {
+ log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed amount \"%s\" is invalid", recognizedResult.Amount)
+ return nil, errs.ErrOperationFailed
+ }
+
+ recognizedReceiptImageResponse.SourceAmount = amount
+
+ if recognizedReceiptImageResponse.Type == models.TRANSACTION_TYPE_TRANSFER && len(recognizedResult.DestinationAmount) > 0 {
+ destinationAmount, err := utils.ParseAmount(recognizedResult.DestinationAmount)
+
+ if err != nil {
+ log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed destination amount \"%s\" is invalid", recognizedResult.DestinationAmount)
+ return nil, errs.ErrOperationFailed
+ }
+
+ recognizedReceiptImageResponse.DestinationAmount = destinationAmount
+ }
+ }
+
+ if len(recognizedResult.AccountName) > 0 {
+ account, exists := accountMap[recognizedResult.AccountName]
+
+ if exists {
+ recognizedReceiptImageResponse.SourceAccountId = account.AccountId
+ }
+ }
+
+ if len(recognizedResult.DestinationAccountName) > 0 {
+ account, exists := accountMap[recognizedResult.DestinationAccountName]
+
+ if exists {
+ recognizedReceiptImageResponse.DestinationAccountId = account.AccountId
+ }
+ }
+
+ if len(recognizedResult.TagNames) > 0 {
+ tagIds := make([]string, 0, len(recognizedResult.TagNames))
+
+ for i := 0; i < len(recognizedResult.TagNames); i++ {
+ tagName := recognizedResult.TagNames[i]
+ tag, exists := tagMap[tagName]
+
+ if exists {
+ tagIds = append(tagIds, utils.Int64ToString(tag.TagId))
+ }
+ }
+
+ recognizedReceiptImageResponse.TagIds = tagIds
+ }
+
+ if len(recognizedResult.Description) > 0 {
+ recognizedReceiptImageResponse.Comment = recognizedResult.Description
+ }
+
+ return recognizedReceiptImageResponse, nil
+}
diff --git a/pkg/api/server_settings.go b/pkg/api/server_settings.go
index ffc5b372..80c65fee 100644
--- a/pkg/api/server_settings.go
+++ b/pkg/api/server_settings.go
@@ -47,6 +47,12 @@ func (a *ServerSettingsApi) ServerSettingsJavascriptHandler(c *core.WebContext)
a.appendBooleanSetting(builder, "mcp", config.EnableMCPServer)
}
+ if config.LLMProvider != "" {
+ if config.TransactionFromAIImageRecognition {
+ a.appendBooleanSetting(builder, "llmt", config.TransactionFromAIImageRecognition)
+ }
+ }
+
if config.LoginPageTips.Enabled {
a.appendMultiLanguageTipSetting(builder, "lpt", config.LoginPageTips)
}
diff --git a/pkg/core/user_feature_restriction.go b/pkg/core/user_feature_restriction.go
index cec37c2b..2e0fa6ed 100644
--- a/pkg/core/user_feature_restriction.go
+++ b/pkg/core/user_feature_restriction.go
@@ -76,19 +76,20 @@ type UserFeatureRestrictionType uint64
// User Feature Restriction Type
const (
- USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1
- USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2
- USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3
- USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4
- USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5
- USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6
- USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7
- USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8
- USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9
- USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10
- USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11
- USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12
- USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13
+ USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1
+ USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2
+ USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3
+ USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4
+ USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5
+ USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6
+ USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7
+ USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8
+ USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9
+ USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10
+ USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11
+ USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12
+ USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13
+ USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION UserFeatureRestrictionType = 14
)
const userFeatureRestrictionTypeMinValue UserFeatureRestrictionType = USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD
diff --git a/pkg/errs/error.go b/pkg/errs/error.go
index d0bb0f0e..41d463df 100644
--- a/pkg/errs/error.go
+++ b/pkg/errs/error.go
@@ -40,6 +40,7 @@ const (
NormalSubcategoryConverter = 12
NormalSubcategoryUserCustomExchangeRate = 13
NormalSubcategoryModelContextProtocol = 14
+ NormalSubcategoryLargeLanguageModel = 15
)
// Error represents the specific error returned to user
diff --git a/pkg/errs/large_language_model.go b/pkg/errs/large_language_model.go
new file mode 100644
index 00000000..fff49341
--- /dev/null
+++ b/pkg/errs/large_language_model.go
@@ -0,0 +1,11 @@
+package errs
+
+import "net/http"
+
+// Error codes related to large language model features
+var (
+ ErrLargeLanguageModelProviderNotEnabled = NewNormalError(NormalSubcategoryLargeLanguageModel, 0, http.StatusBadRequest, "llm provider is not enabled")
+ ErrNoAIRecognitionImage = NewNormalError(NormalSubcategoryLargeLanguageModel, 1, http.StatusBadRequest, "no image for AI recognition")
+ ErrAIRecognitionImageIsEmpty = NewNormalError(NormalSubcategoryLargeLanguageModel, 2, http.StatusBadRequest, "image for AI recognition is empty")
+ ErrExceedMaxAIRecognitionImageFileSize = NewNormalError(NormalSubcategoryLargeLanguageModel, 3, http.StatusBadRequest, "exceed the maximum size of image file for AI recognition")
+)
diff --git a/pkg/errs/setting.go b/pkg/errs/setting.go
index cfda4b12..8fc901b0 100644
--- a/pkg/errs/setting.go
+++ b/pkg/errs/setting.go
@@ -24,4 +24,6 @@ var (
ErrInvalidPasswordResetTokenExpiredTime = NewSystemError(SystemSubcategorySetting, 17, http.StatusInternalServerError, "invalid password reset token expired time")
ErrInvalidExchangeRatesDataSource = NewSystemError(SystemSubcategorySetting, 18, http.StatusInternalServerError, "invalid exchange rates data source")
ErrInvalidIpAddressPattern = NewSystemError(SystemSubcategorySetting, 19, http.StatusInternalServerError, "invalid ip address pattern")
+ ErrInvalidLLMProvider = NewSystemError(SystemSubcategorySetting, 20, http.StatusInternalServerError, "invalid llm provider")
+ ErrInvalidLLMModelId = NewSystemError(SystemSubcategorySetting, 21, http.StatusInternalServerError, "invalid llm model id")
)
diff --git a/pkg/llm/http_large_language_model_provider.go b/pkg/llm/http_large_language_model_provider.go
new file mode 100644
index 00000000..e7ea6bd9
--- /dev/null
+++ b/pkg/llm/http_large_language_model_provider.go
@@ -0,0 +1,91 @@
+package llm
+
+import (
+ "crypto/tls"
+ "io"
+ "net/http"
+ "time"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/errs"
+ "github.com/mayswind/ezbookkeeping/pkg/log"
+ "github.com/mayswind/ezbookkeeping/pkg/settings"
+ "github.com/mayswind/ezbookkeeping/pkg/utils"
+)
+
+// HttpLargeLanguageModelProvider defines the structure of http large language model provider
+type HttpLargeLanguageModelProvider interface {
+ // BuildTextualRequest returns the http request by the provider api definition
+ BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error)
+
+ // ParseTextualResponse returns the textual response entity by the provider api definition
+ ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error)
+
+ // GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
+ GetReceiptImageRecognitionModelID() string
+}
+
+// CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider
+type CommonHttpLargeLanguageModelProvider struct {
+ LargeLanguageModelProvider
+ provider HttpLargeLanguageModelProvider
+}
+
+// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the OpenAI common compatible large language model provider
+func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
+ return p.getTextualResponse(c, uid, currentConfig, request, p.provider.GetReceiptImageRecognitionModelID(), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+}
+
+func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
+ if modelId == "" {
+ return nil, errs.ErrInvalidLLMModelId
+ }
+
+ transport := http.DefaultTransport.(*http.Transport).Clone()
+ utils.SetProxyUrl(transport, currentConfig.LargeLanguageModelAPIProxy)
+
+ if currentConfig.LargeLanguageModelAPISkipTLSVerify {
+ transport.TLSClientConfig = &tls.Config{
+ InsecureSkipVerify: true,
+ }
+ }
+
+ client := &http.Client{
+ Transport: transport,
+ Timeout: time.Duration(currentConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond,
+ }
+
+ httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, modelId, responseType)
+
+ if err != nil {
+ log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ httpRequest.Header.Set("User-Agent", settings.GetUserAgent())
+
+ resp, err := client.Do(httpRequest)
+
+ if err != nil {
+ log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to request large language model api for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ defer resp.Body.Close()
+ body, err := io.ReadAll(resp.Body)
+
+ log.Debugf(c, "[http_large_language_model_provider.getTextualResponse] response is %s", body)
+
+ if resp.StatusCode != 200 {
+ log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to get large language model api response for user \"uid:%d\", because response code is %d", uid, resp.StatusCode)
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ return p.provider.ParseTextualResponse(c, uid, body, responseType)
+}
+
+func newCommonHttpLargeLanguageModelProvider(provider HttpLargeLanguageModelProvider) *CommonHttpLargeLanguageModelProvider {
+ return &CommonHttpLargeLanguageModelProvider{
+ provider: provider,
+ }
+}
diff --git a/pkg/llm/large_language_model_data.go b/pkg/llm/large_language_model_data.go
new file mode 100644
index 00000000..f0c2d5cb
--- /dev/null
+++ b/pkg/llm/large_language_model_data.go
@@ -0,0 +1,33 @@
+package llm
+
+import "reflect"
+
+type LargeLanguageModelRequestPromptType byte
+
+// Large Language Model Request Prompt Type
+const (
+ LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_TEXT LargeLanguageModelRequestPromptType = 0
+ LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL LargeLanguageModelRequestPromptType = 1
+)
+
+type LargeLanguageModelResponseFormat byte
+
+// Large Language Model Response Format
+const (
+ LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_TEXT LargeLanguageModelResponseFormat = 0
+ LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON LargeLanguageModelResponseFormat = 1
+)
+
+// LargeLanguageModelRequest represents a request to a large language model
+type LargeLanguageModelRequest struct {
+ Stream bool
+ SystemPrompt string
+ UserPrompt []byte
+ UserPromptType LargeLanguageModelRequestPromptType
+ ResponseJsonObjectType reflect.Type
+}
+
+// LargeLanguageModelTextualResponse represents a textual response from a large language model
+type LargeLanguageModelTextualResponse struct {
+ Content string
+}
diff --git a/pkg/llm/large_language_model_provider.go b/pkg/llm/large_language_model_provider.go
new file mode 100644
index 00000000..8ae58804
--- /dev/null
+++ b/pkg/llm/large_language_model_provider.go
@@ -0,0 +1,12 @@
+package llm
+
+import (
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/settings"
+)
+
+// LargeLanguageModelProvider defines the structure of large language model provider
+type LargeLanguageModelProvider interface {
+ // GetJsonResponseByReceiptImageRecognitionModel returns the json response from the large language model provider by receipt image recognition model
+ GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error)
+}
diff --git a/pkg/llm/large_language_model_provider_container.go b/pkg/llm/large_language_model_provider_container.go
new file mode 100644
index 00000000..2bd1624c
--- /dev/null
+++ b/pkg/llm/large_language_model_provider_container.go
@@ -0,0 +1,45 @@
+package llm
+
+import (
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/errs"
+ "github.com/mayswind/ezbookkeeping/pkg/settings"
+)
+
+// LargeLanguageModelProviderContainer contains the current large language model provider
+type LargeLanguageModelProviderContainer struct {
+ current LargeLanguageModelProvider
+}
+
+// Initialize a large language model provider container singleton instance
+var (
+ Container = &LargeLanguageModelProviderContainer{}
+)
+
+// InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config
+func InitializeLargeLanguageModelProvider(config *settings.Config) error {
+ if config.LLMProvider == settings.OpenAILLMProvider {
+ Container.current = NewOpenAILargeLanguageModelProvider(config)
+ return nil
+ } else if config.LLMProvider == settings.OpenAICompatibleLLMProvider {
+ Container.current = NewOpenAICompatibleLargeLanguageModelProvider(config)
+ return nil
+ } else if config.LLMProvider == settings.OpenRouterLLMProvider {
+ Container.current = NewOpenRouterLargeLanguageModelProvider(config)
+ return nil
+ } else if config.LLMProvider == settings.OllamaLLMProvider {
+ Container.current = NewOllamaLargeLanguageModelProvider(config)
+ return nil
+ }
+
+ return errs.ErrInvalidLLMProvider
+}
+
+// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model
+func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
+ if Container.current == nil {
+ return nil, errs.ErrInvalidLLMProvider
+ }
+
+ return l.current.GetJsonResponseByReceiptImageRecognitionModel(c, uid, currentConfig, request)
+}
diff --git a/pkg/llm/ollama_large_language_model_provider.go b/pkg/llm/ollama_large_language_model_provider.go
new file mode 100644
index 00000000..5a69ddae
--- /dev/null
+++ b/pkg/llm/ollama_large_language_model_provider.go
@@ -0,0 +1,153 @@
+package llm
+
+import (
+ "bytes"
+ "encoding/base64"
+ "encoding/json"
+ "net/http"
+ "strings"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/errs"
+ "github.com/mayswind/ezbookkeeping/pkg/log"
+ "github.com/mayswind/ezbookkeeping/pkg/settings"
+)
+
+const ollamaChatCompletionsPath = "api/chat"
+
+// OllamaLargeLanguageModelProvider defines the structure of Ollama large language model provider
+type OllamaLargeLanguageModelProvider struct {
+ CommonHttpLargeLanguageModelProvider
+ OllamaServerURL string
+ ReceiptImageRecognitionModelID string
+}
+
+// BuildTextualRequest returns the http request by Ollama provider
+func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
+ requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
+
+ if err != nil {
+ return nil, err
+ }
+
+ httpRequest, err := http.NewRequest("POST", p.getOllamaRequestUrl(), bytes.NewReader(requestBody))
+
+ if err != nil {
+ return nil, err
+ }
+
+ httpRequest.Header.Set("Content-Type", "application/json")
+
+ return httpRequest, nil
+}
+
+// ParseTextualResponse returns the textual response by Ollama provider
+func (p *OllamaLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
+ responseBody := make(map[string]any)
+ err := json.Unmarshal(body, &responseBody)
+
+ if err != nil {
+ log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ message, ok := responseBody["message"].(map[string]any)
+
+ if !ok {
+ log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no message found in response for user \"uid:%d\"", uid)
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ content, ok := message["content"].(string)
+
+ if !ok {
+ log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
+ if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
+ content = strings.TrimPrefix(content, "```json")
+ content = strings.TrimSuffix(content, "```")
+ } else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
+ content = strings.TrimPrefix(content, "```")
+ content = strings.TrimSuffix(content, "```")
+ }
+ }
+
+ textualResponse := &LargeLanguageModelTextualResponse{
+ Content: content,
+ }
+
+ return textualResponse, nil
+}
+
+// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of Ollama provider
+func (p *OllamaLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
+ return p.ReceiptImageRecognitionModelID
+}
+
+func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
+ requestMessages := make([]any, 0)
+
+ if request.SystemPrompt != "" {
+ requestMessages = append(requestMessages, map[string]string{
+ "role": "system",
+ "content": request.SystemPrompt,
+ })
+ }
+
+ if len(request.UserPrompt) > 0 {
+ imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt)
+ if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
+ requestMessages = append(requestMessages, map[string]any{
+ "role": "user",
+ "content": "",
+ "images": []string{imageBase64Data},
+ })
+ } else {
+ requestMessages = append(requestMessages, map[string]string{
+ "role": "user",
+ "content": string(request.UserPrompt),
+ })
+ }
+ }
+
+ requestBody := make(map[string]any)
+ requestBody["model"] = modelId
+ requestBody["stream"] = request.Stream
+ requestBody["messages"] = requestMessages
+
+ if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
+ requestBody["format"] = "json"
+ }
+
+ requestBodyBytes, err := json.Marshal(requestBody)
+
+ if err != nil {
+ log.Errorf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrOperationFailed
+ }
+
+ log.Debugf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
+ return requestBodyBytes, nil
+}
+
+func (p *OllamaLargeLanguageModelProvider) getOllamaRequestUrl() string {
+ url := p.OllamaServerURL
+
+ if url[len(url)-1] != '/' {
+ url += "/"
+ }
+
+ url += ollamaChatCompletionsPath
+ return url
+}
+
+// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance
+func NewOllamaLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
+ return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{
+ OllamaServerURL: config.OllamaServerURL,
+ ReceiptImageRecognitionModelID: config.OllamaReceiptImageRecognitionModelID,
+ })
+}
diff --git a/pkg/llm/ollama_large_language_model_provider_test.go b/pkg/llm/ollama_large_language_model_provider_test.go
new file mode 100644
index 00000000..8f530528
--- /dev/null
+++ b/pkg/llm/ollama_large_language_model_provider_test.go
@@ -0,0 +1,138 @@
+package llm
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+)
+
+func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
+ provider := &OllamaLargeLanguageModelProvider{}
+
+ request := &LargeLanguageModelRequest{
+ SystemPrompt: "You are a helpful assistant.",
+ UserPrompt: []byte("Hello, how are you?"),
+ }
+
+ bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.Nil(t, err)
+
+ var body map[string]interface{}
+ err = json.Unmarshal(bodyBytes, &body)
+ assert.Nil(t, err)
+
+ assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
+}
+
+func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
+ provider := &OllamaLargeLanguageModelProvider{}
+
+ request := &LargeLanguageModelRequest{
+ SystemPrompt: "What's in this image?",
+ UserPrompt: []byte("fakedata"),
+ UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
+ }
+
+ bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.Nil(t, err)
+
+ var body map[string]interface{}
+ err = json.Unmarshal(bodyBytes, &body)
+ assert.Nil(t, err)
+
+ assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":\"\",\"images\":[\"ZmFrZWRhdGE=\"],\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
+}
+
+func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
+ provider := &OllamaLargeLanguageModelProvider{}
+
+ response := `{
+ "model": "test",
+ "created_at": "2025-09-01T01:02:03.456789Z",
+ "message": {
+ "role": "assistant",
+ "content": "This is a test response"
+ }
+ }`
+
+ result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.Nil(t, err)
+ assert.Equal(t, "This is a test response", result.Content)
+}
+
+func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
+ provider := &OllamaLargeLanguageModelProvider{}
+
+ response := `{
+ "model": "test",
+ "created_at": "2025-09-01T01:02:03.456789Z",
+ "message": {
+ "role": "assistant",
+ "content": ""
+ }
+ }`
+
+ result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.Nil(t, err)
+ assert.Equal(t, "", result.Content)
+}
+
+func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
+ provider := &OllamaLargeLanguageModelProvider{}
+
+ response := `{
+ "model": "test",
+ "created_at": "2025-09-01T01:02:03.456789Z",
+ "message": {}
+ }`
+
+ _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.EqualError(t, err, "failed to request third party api")
+}
+
+func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
+ provider := &OllamaLargeLanguageModelProvider{}
+
+ response := `{
+ "model": "test",
+ "created_at": "2025-09-01T01:02:03.456789Z",
+ "message": {
+ "role": "assistant"
+ }
+ }`
+
+ _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.EqualError(t, err, "failed to request third party api")
+}
+
+func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
+ provider := &OllamaLargeLanguageModelProvider{}
+
+ response := "error"
+
+ _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.EqualError(t, err, "failed to request third party api")
+}
+
+func TestOllamaLargeLanguageModelProvider_GetOllamaRequestUrl(t *testing.T) {
+ provider := &OllamaLargeLanguageModelProvider{
+ OllamaServerURL: "http://localhost:11434/",
+ }
+ url := provider.getOllamaRequestUrl()
+ assert.Equal(t, "http://localhost:11434/api/chat", url)
+
+ provider = &OllamaLargeLanguageModelProvider{
+ OllamaServerURL: "http://localhost:11434",
+ }
+ url = provider.getOllamaRequestUrl()
+ assert.Equal(t, "http://localhost:11434/api/chat", url)
+
+ provider = &OllamaLargeLanguageModelProvider{
+ OllamaServerURL: "http://example.com/ollama/",
+ }
+ url = provider.getOllamaRequestUrl()
+ assert.Equal(t, "http://example.com/ollama/api/chat", url)
+}
diff --git a/pkg/llm/openai_common_compatible_large_language_model_provider.go b/pkg/llm/openai_common_compatible_large_language_model_provider.go
new file mode 100644
index 00000000..60717b7b
--- /dev/null
+++ b/pkg/llm/openai_common_compatible_large_language_model_provider.go
@@ -0,0 +1,187 @@
+package llm
+
+import (
+ "bytes"
+ "encoding/base64"
+ "encoding/json"
+ "io"
+ "net/http"
+ "strings"
+
+ "github.com/invopop/jsonschema"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/errs"
+ "github.com/mayswind/ezbookkeeping/pkg/log"
+)
+
+// OpenAIChatCompletionsLargeLanguageModelProvider defines the structure of OpenAI chat completions compatible large language model provider
+type OpenAIChatCompletionsLargeLanguageModelProvider interface {
+ // BuildChatCompletionsHttpRequest returns the chat completions http request
+ BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error)
+
+ // GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
+ GetReceiptImageRecognitionModelID() string
+}
+
+// OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api
+type OpenAICommonChatCompletionsHttpLargeLanguageModelProvider struct {
+ CommonHttpLargeLanguageModelProvider
+ provider OpenAIChatCompletionsLargeLanguageModelProvider
+}
+
+// BuildTextualRequest returns the http request by OpenAI common compatible provider
+func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
+ requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
+
+ if err != nil {
+ return nil, err
+ }
+
+ httpRequest, err := p.provider.BuildChatCompletionsHttpRequest(c, uid)
+
+ if err != nil {
+ return nil, err
+ }
+
+ httpRequest.Body = io.NopCloser(bytes.NewReader(requestBody))
+ httpRequest.Header.Set("Content-Type", "application/json")
+
+ return httpRequest, nil
+}
+
+// ParseTextualResponse returns the textual response by OpenAI common compatible provider
+func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
+ responseBody := make(map[string]any)
+ err := json.Unmarshal(body, &responseBody)
+
+ if err != nil {
+ log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ choices, ok := responseBody["choices"].([]any)
+
+ if !ok || len(choices) < 1 {
+ log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no choices found in response for user \"uid:%d\"", uid)
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ firstChoice, ok := choices[0].(map[string]any)
+
+ if !ok {
+ log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] invalid choice format in response for user \"uid:%d\"", uid)
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ message, ok := firstChoice["message"].(map[string]any)
+
+ if !ok {
+ log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no message found in choice for user \"uid:%d\"", uid)
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ content, ok := message["content"].(string)
+
+ if !ok {
+ log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
+ return nil, errs.ErrFailedToRequestRemoteApi
+ }
+
+ if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
+ if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
+ content = strings.TrimPrefix(content, "```json")
+ content = strings.TrimSuffix(content, "```")
+ } else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
+ content = strings.TrimPrefix(content, "```")
+ content = strings.TrimSuffix(content, "```")
+ }
+ }
+
+ textualResponse := &LargeLanguageModelTextualResponse{
+ Content: content,
+ }
+
+ return textualResponse, nil
+}
+
+// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI common compatible provider
+func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
+ return p.provider.GetReceiptImageRecognitionModelID()
+}
+
+func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
+ requestMessages := make([]any, 0)
+
+ if request.SystemPrompt != "" {
+ requestMessages = append(requestMessages, map[string]string{
+ "role": "system",
+ "content": request.SystemPrompt,
+ })
+ }
+
+ if len(request.UserPrompt) > 0 {
+ if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
+ imageBase64Data := "data:image/png;base64," + base64.StdEncoding.EncodeToString(request.UserPrompt)
+ requestMessages = append(requestMessages, map[string]any{
+ "role": "user",
+ "content": []any{
+ core.O{
+ "type": "image_url",
+ "image_url": core.O{
+ "url": imageBase64Data,
+ },
+ },
+ },
+ })
+ } else {
+ requestMessages = append(requestMessages, map[string]string{
+ "role": "user",
+ "content": string(request.UserPrompt),
+ })
+ }
+ }
+
+ requestBody := make(map[string]any)
+ requestBody["model"] = modelId
+ requestBody["stream"] = request.Stream
+ requestBody["messages"] = requestMessages
+
+ if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
+ if request.ResponseJsonObjectType != nil {
+ schemeGenerator := jsonschema.Reflector{
+ Anonymous: true,
+ DoNotReference: true,
+ ExpandedStruct: true,
+ }
+
+ schema := schemeGenerator.ReflectFromType(request.ResponseJsonObjectType)
+ schema.Version = ""
+
+ requestBody["response_format"] = core.O{
+ "type": "json_schema",
+ "json_schema": schema,
+ }
+ } else {
+ requestBody["response_format"] = core.O{
+ "type": "json_object",
+ }
+ }
+ }
+
+ requestBodyBytes, err := json.Marshal(requestBody)
+
+ if err != nil {
+ log.Errorf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
+ return nil, errs.ErrOperationFailed
+ }
+
+ log.Debugf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
+ return requestBodyBytes, nil
+}
+
+func newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(provider OpenAIChatCompletionsLargeLanguageModelProvider) LargeLanguageModelProvider {
+ return newCommonHttpLargeLanguageModelProvider(&OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
+ provider: provider,
+ })
+}
diff --git a/pkg/llm/openai_common_compatible_large_language_model_provider_test.go b/pkg/llm/openai_common_compatible_large_language_model_provider_test.go
new file mode 100644
index 00000000..f7e96958
--- /dev/null
+++ b/pkg/llm/openai_common_compatible_large_language_model_provider_test.go
@@ -0,0 +1,157 @@
+package llm
+
+import (
+ "encoding/json"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+)
+
+func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
+ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
+ provider: &OpenAILargeLanguageModelProvider{},
+ }
+
+ request := &LargeLanguageModelRequest{
+ SystemPrompt: "You are a helpful assistant.",
+ UserPrompt: []byte("Hello, how are you?"),
+ }
+
+ bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.Nil(t, err)
+
+ var body map[string]interface{}
+ err = json.Unmarshal(bodyBytes, &body)
+ assert.Nil(t, err)
+
+ assert.Equal(t, "{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
+}
+
+func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
+ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
+ provider: &OpenAILargeLanguageModelProvider{},
+ }
+
+ request := &LargeLanguageModelRequest{
+ SystemPrompt: "What's in this image?",
+ UserPrompt: []byte("fakedata"),
+ UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
+ }
+
+ bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.Nil(t, err)
+
+ var body map[string]interface{}
+ err = json.Unmarshal(bodyBytes, &body)
+ assert.Nil(t, err)
+
+ assert.Equal(t, "{\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":[{\"image_url\":{\"url\":\"data:image/png;base64,ZmFrZWRhdGE=\"},\"type\":\"image_url\"}],\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
+}
+
+func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
+ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
+ provider: &OpenAILargeLanguageModelProvider{},
+ }
+
+ response := `{
+ "id": "test-123",
+ "object": "chat.completion",
+ "created": 1234567890,
+ "model": "test",
+ "usage": {
+ "prompt_tokens": 13,
+ "completion_tokens": 7,
+ "total_tokens": 20
+ },
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": "This is a test response"
+ }
+ }
+ ]
+ }`
+
+ result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.Nil(t, err)
+ assert.Equal(t, "This is a test response", result.Content)
+}
+
+func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
+ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
+ provider: &OpenAILargeLanguageModelProvider{},
+ }
+
+ response := `{
+ "id": "test-123",
+ "object": "chat.completion",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "message": {
+ "role": "assistant",
+ "content": ""
+ }
+ }
+ ]
+ }`
+
+ result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.Nil(t, err)
+ assert.Equal(t, "", result.Content)
+}
+
+func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
+ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
+ provider: &OpenAILargeLanguageModelProvider{},
+ }
+
+ response := `{
+ "id": "test-123",
+ "object": "chat.completion",
+ "choices": []
+ }`
+
+ _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.EqualError(t, err, "failed to request third party api")
+}
+
+func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
+ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
+ provider: &OpenAILargeLanguageModelProvider{},
+ }
+
+ response := `{
+ "id": "chatcmpl-123",
+ "object": "chat.completion",
+ "choices": [
+ {
+ "finish_reason": "stop",
+ "index": 0,
+ "message": {
+ "role": "assistant"
+ }
+ }
+ ]
+ }`
+
+ _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.EqualError(t, err, "failed to request third party api")
+}
+
+func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
+ provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
+ provider: &OpenAILargeLanguageModelProvider{},
+ }
+
+ response := "error"
+
+ _, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
+ assert.EqualError(t, err, "failed to request third party api")
+}
diff --git a/pkg/llm/openai_compatible_large_language_model_provider.go b/pkg/llm/openai_compatible_large_language_model_provider.go
new file mode 100644
index 00000000..741cc874
--- /dev/null
+++ b/pkg/llm/openai_compatible_large_language_model_provider.go
@@ -0,0 +1,58 @@
+package llm
+
+import (
+ "net/http"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/settings"
+)
+
+const openAICompatibleChatCompletionsPath = "chat/completions"
+
+// OpenAICompatibleLargeLanguageModelProvider defines the structure of OpenAI compatible large language model provider
+type OpenAICompatibleLargeLanguageModelProvider struct {
+ OpenAIChatCompletionsLargeLanguageModelProvider
+ OpenAICompatibleBaseURL string
+ OpenAICompatibleAPIKey string
+ ReceiptImageRecognitionModelID string
+}
+
+// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider
+func (p *OpenAICompatibleLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
+ req, err := http.NewRequest("POST", p.getFinalChatCompletionsRequestUrl(), nil)
+
+ if err != nil {
+ return nil, err
+ }
+
+ if p.OpenAICompatibleAPIKey != "" {
+ req.Header.Set("Authorization", "Bearer "+p.OpenAICompatibleAPIKey)
+ }
+
+ return req, nil
+}
+
+// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI compatible provider
+func (p *OpenAICompatibleLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
+ return p.ReceiptImageRecognitionModelID
+}
+
+func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string {
+ url := p.OpenAICompatibleBaseURL
+
+ if url[len(url)-1] != '/' {
+ url += "/"
+ }
+
+ url += openAICompatibleChatCompletionsPath
+ return url
+}
+
+// NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance
+func NewOpenAICompatibleLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
+ return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{
+ OpenAICompatibleBaseURL: config.OpenAICompatibleBaseURL,
+ OpenAICompatibleAPIKey: config.OpenAICompatibleAPIKey,
+ ReceiptImageRecognitionModelID: config.OpenAICompatibleReceiptImageRecognitionModelID,
+ })
+}
diff --git a/pkg/llm/openai_compatible_large_language_model_provider_test.go b/pkg/llm/openai_compatible_large_language_model_provider_test.go
new file mode 100644
index 00000000..e288b233
--- /dev/null
+++ b/pkg/llm/openai_compatible_large_language_model_provider_test.go
@@ -0,0 +1,27 @@
+package llm
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestOpenAICompatibleLargeLanguageModelProvider_GetFinalRequestUrl(t *testing.T) {
+ provider := &OpenAICompatibleLargeLanguageModelProvider{
+ OpenAICompatibleBaseURL: "https://api.example.com/v1/",
+ }
+ url := provider.getFinalChatCompletionsRequestUrl()
+ assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
+
+ provider = &OpenAICompatibleLargeLanguageModelProvider{
+ OpenAICompatibleBaseURL: "https://api.example.com/v1",
+ }
+ url = provider.getFinalChatCompletionsRequestUrl()
+ assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
+
+ provider = &OpenAICompatibleLargeLanguageModelProvider{
+ OpenAICompatibleBaseURL: "https://example.com/api",
+ }
+ url = provider.getFinalChatCompletionsRequestUrl()
+ assert.Equal(t, "https://example.com/api/chat/completions", url)
+}
diff --git a/pkg/llm/openai_large_language_model_provider.go b/pkg/llm/openai_large_language_model_provider.go
new file mode 100644
index 00000000..2bb51352
--- /dev/null
+++ b/pkg/llm/openai_large_language_model_provider.go
@@ -0,0 +1,43 @@
+package llm
+
+import (
+ "net/http"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/settings"
+)
+
+// OpenAILargeLanguageModelProvider defines the structure of OpenAI large language model provider
+type OpenAILargeLanguageModelProvider struct {
+ OpenAIChatCompletionsLargeLanguageModelProvider
+ OpenAIAPIKey string
+ ReceiptImageRecognitionModelID string
+}
+
+const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions"
+
+// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI provider
+func (p *OpenAILargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
+ req, err := http.NewRequest("POST", openAIChatCompletionsUrl, nil)
+
+ if err != nil {
+ return nil, err
+ }
+
+ req.Header.Set("Authorization", "Bearer "+p.OpenAIAPIKey)
+
+ return req, nil
+}
+
+// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI provider
+func (p *OpenAILargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
+ return p.ReceiptImageRecognitionModelID
+}
+
+// NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance
+func NewOpenAILargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
+ return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{
+ OpenAIAPIKey: config.OpenAIAPIKey,
+ ReceiptImageRecognitionModelID: config.OpenAIReceiptImageRecognitionModelID,
+ })
+}
diff --git a/pkg/llm/openrouter_large_language_model_provider.go b/pkg/llm/openrouter_large_language_model_provider.go
new file mode 100644
index 00000000..16873bf0
--- /dev/null
+++ b/pkg/llm/openrouter_large_language_model_provider.go
@@ -0,0 +1,45 @@
+package llm
+
+import (
+ "net/http"
+
+ "github.com/mayswind/ezbookkeeping/pkg/core"
+ "github.com/mayswind/ezbookkeeping/pkg/settings"
+)
+
+// OpenRouterLargeLanguageModelProvider defines the structure of OpenRouter large language model provider
+type OpenRouterLargeLanguageModelProvider struct {
+ OpenAIChatCompletionsLargeLanguageModelProvider
+ OpenRouterAPIKey string
+ ReceiptImageRecognitionModelID string
+}
+
+const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions"
+
+// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenRouter provider
+func (p *OpenRouterLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
+ req, err := http.NewRequest("POST", openRouterChatCompletionsUrl, nil)
+
+ if err != nil {
+ return nil, err
+ }
+
+ req.Header.Set("Authorization", "Bearer "+p.OpenRouterAPIKey)
+ req.Header.Set("HTTP-Referer", "https://ezbookkeeping.mayswind.net/")
+ req.Header.Set("X-Title", "ezBookkeeping")
+
+ return req, nil
+}
+
+// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenRouter provider
+func (p *OpenRouterLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
+ return p.ReceiptImageRecognitionModelID
+}
+
+// NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance
+func NewOpenRouterLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
+ return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{
+ OpenRouterAPIKey: config.OpenRouterAPIKey,
+ ReceiptImageRecognitionModelID: config.OpenRouterReceiptImageRecognitionModelID,
+ })
+}
diff --git a/pkg/models/large_language_model.go b/pkg/models/large_language_model.go
new file mode 100644
index 00000000..2e5e0d7f
--- /dev/null
+++ b/pkg/models/large_language_model.go
@@ -0,0 +1,27 @@
+package models
+
+// RecognizedReceiptImageResponse represents a view-object of recognized receipt image response
+type RecognizedReceiptImageResponse struct {
+ Type TransactionType `json:"type"`
+ Time int64 `json:"time,omitempty"`
+ CategoryId int64 `json:"categoryId,string,omitempty"`
+ SourceAccountId int64 `json:"sourceAccountId,string,omitempty"`
+ DestinationAccountId int64 `json:"destinationAccountId,string,omitempty"`
+ SourceAmount int64 `json:"sourceAmount,omitempty"`
+ DestinationAmount int64 `json:"destinationAmount,omitempty"`
+ TagIds []string `json:"tagIds,omitempty"`
+ Comment string `json:"comment,omitempty"`
+}
+
+// RecognizedReceiptImageResult represents the result of recognized receipt image
+type RecognizedReceiptImageResult struct {
+ Type string `json:"type,omitempty" jsonschema:"enum=income,enum=expense,enum=transfer" jsonschema_description:"Transaction type (income, expense, transfer)"`
+ Time string `json:"time" jsonschema:"format=date-time" jsonschema_description:"Transaction time in long date time format (YYYY-MM-DD HH:mm:ss, e.g. 2023-01-01 12:00:00)"`
+ Amount string `json:"amount,omitempty" jsonschema_description:"Transaction amount"`
+ AccountName string `json:"account,omitempty" jsonschema_description:"Account name for the transaction"`
+ CategoryName string `json:"category,omitempty" jsonschema_description:"Category name for the transaction"`
+ TagNames []string `json:"tags,omitempty" jsonschema_description:"List of tags associated with the transaction (maximum 10 tags allowed)"`
+ Description string `json:"description,omitempty" jsonschema_description:"Transaction description"`
+ DestinationAmount string `json:"destination_amount,omitempty" jsonschema_description:"Destination amount for transfer transactions"`
+ DestinationAccountName string `json:"destination_account,omitempty" jsonschema_description:"Destination account name for transfer transactions"`
+}
diff --git a/pkg/settings/setting.go b/pkg/settings/setting.go
index 10e8fa0e..f3db6068 100644
--- a/pkg/settings/setting.go
+++ b/pkg/settings/setting.go
@@ -66,6 +66,13 @@ const (
WebDAVStorageType string = "webdav"
)
+const (
+ OpenAILLMProvider string = "openai"
+ OpenAICompatibleLLMProvider string = "openai_compatible"
+ OpenRouterLLMProvider string = "openrouter"
+ OllamaLLMProvider string = "ollama"
+)
+
// Uuid generator types
const (
InternalUuidGeneratorType string = "internal"
@@ -140,6 +147,9 @@ const (
defaultWebDAVRequestTimeout uint32 = 10000 // 10 seconds
+ defaultAIRecognitionPictureMaxSize uint32 = 10485760 // 10MB
+ defaultLargeLanguageModelAPIRequestTimeout uint32 = 60000 // 60 seconds
+
defaultInMemoryDuplicateCheckerCleanupInterval uint32 = 60 // 1 minutes
defaultDuplicateSubmissionsInterval uint32 = 300 // 5 minutes
@@ -281,6 +291,23 @@ type Config struct {
MinIOConfig *MinIOConfig
WebDAVConfig *WebDAVConfig
+ // Large Language Model
+ LLMProvider string
+ OpenAIAPIKey string
+ OpenAIReceiptImageRecognitionModelID string
+ OpenAICompatibleBaseURL string
+ OpenAICompatibleAPIKey string
+ OpenAICompatibleReceiptImageRecognitionModelID string
+ OpenRouterAPIKey string
+ OpenRouterReceiptImageRecognitionModelID string
+ OllamaServerURL string
+ OllamaReceiptImageRecognitionModelID string
+ TransactionFromAIImageRecognition bool
+ MaxAIRecognitionPictureFileSize uint32
+ LargeLanguageModelAPIRequestTimeout uint32
+ LargeLanguageModelAPIProxy string
+ LargeLanguageModelAPISkipTLSVerify bool
+
// Uuid
UuidGeneratorType string
UuidServerId uint8
@@ -426,6 +453,12 @@ func LoadConfiguration(configFilePath string) (*Config, error) {
return nil, err
}
+ err = loadLLMConfiguration(config, cfgFile, "llm")
+
+ if err != nil {
+ return nil, err
+ }
+
err = loadUuidConfiguration(config, cfgFile, "uuid")
if err != nil {
@@ -751,6 +784,46 @@ func loadStorageConfiguration(config *Config, configFile *ini.File, sectionName
return nil
}
+func loadLLMConfiguration(config *Config, configFile *ini.File, sectionName string) error {
+ llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider")
+
+ if llmProvider == "" {
+ config.LLMProvider = ""
+ } else if llmProvider == OpenAILLMProvider {
+ config.LLMProvider = OpenAILLMProvider
+ } else if llmProvider == OpenAICompatibleLLMProvider {
+ config.LLMProvider = OpenAICompatibleLLMProvider
+ } else if llmProvider == OpenRouterLLMProvider {
+ config.LLMProvider = OpenRouterLLMProvider
+ } else if llmProvider == OllamaLLMProvider {
+ config.LLMProvider = OllamaLLMProvider
+ } else {
+ return errs.ErrInvalidLLMProvider
+ }
+
+ config.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key")
+ config.OpenAIReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_receipt_image_recognition_model_id")
+
+ config.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url")
+ config.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key")
+ config.OpenAICompatibleReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_receipt_image_recognition_model_id")
+
+ config.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key")
+ config.OpenRouterReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_receipt_image_recognition_model_id")
+
+ config.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url")
+ config.OllamaReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "ollama_receipt_image_recognition_model_id")
+
+ config.TransactionFromAIImageRecognition = getConfigItemBoolValue(configFile, sectionName, "transaction_from_ai_image_recognition", false)
+ config.MaxAIRecognitionPictureFileSize = getConfigItemUint32Value(configFile, sectionName, "max_ai_recognition_picture_size", defaultAIRecognitionPictureMaxSize)
+
+ config.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system")
+ config.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout)
+ config.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false)
+
+ return nil
+}
+
func loadUuidConfiguration(config *Config, configFile *ini.File, sectionName string) error {
if getConfigItemStringValue(configFile, sectionName, "generator_type") == InternalUuidGeneratorType {
config.UuidGeneratorType = InternalUuidGeneratorType
diff --git a/pkg/templates/known_template.go b/pkg/templates/known_template.go
index 07290ddb..e8550b8d 100644
--- a/pkg/templates/known_template.go
+++ b/pkg/templates/known_template.go
@@ -4,6 +4,7 @@ type KnownTemplate string
// Known templates
const (
- TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email"
- TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset"
+ TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email"
+ TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset"
+ SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION KnownTemplate = "prompt/receipt_image_recognition"
)
diff --git a/src/components/mobile/AIImageRecognitionSheet.vue b/src/components/mobile/AIImageRecognitionSheet.vue
new file mode 100644
index 00000000..a150c37d
--- /dev/null
+++ b/src/components/mobile/AIImageRecognitionSheet.vue
@@ -0,0 +1,181 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {{ tt('Please select a receipt or transaction image first') }}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/src/consts/api.ts b/src/consts/api.ts
index e73c3266..58d7b4f0 100644
--- a/src/consts/api.ts
+++ b/src/consts/api.ts
@@ -7,6 +7,7 @@ export const DEFAULT_API_TIMEOUT: number = 10000; // 10s
export const DEFAULT_UPLOAD_API_TIMEOUT: number = 30000; // 30s
export const DEFAULT_EXPORT_API_TIMEOUT: number = 180000; // 180s
export const DEFAULT_IMPORT_API_TIMEOUT: number = 1800000; // 1800s
+export const DEFAULT_LLM_API_TIMEOUT: number = 600000; // 600s
export const GOOGLE_MAP_JAVASCRIPT_URL: string = 'https://maps.googleapis.com/maps/api/js';
export const BAIDU_MAP_JAVASCRIPT_URL: string = 'https://api.map.baidu.com/api?v=3.0';
diff --git a/src/core/file.ts b/src/core/file.ts
index be86c5f2..51d9fcc4 100644
--- a/src/core/file.ts
+++ b/src/core/file.ts
@@ -6,6 +6,7 @@ export class KnownFileType {
public static readonly TSV = new KnownFileType('tsv', 'text/tab-separated-values');
public static readonly MARKDOWN = new KnownFileType('md', 'text/markdown');
public static readonly JS = new KnownFileType('js', 'application/javascript');
+ public static readonly JPG = new KnownFileType('jpg', 'image/jpeg');
public readonly extension: string;
public readonly contentType: string;
@@ -37,6 +38,12 @@ export class KnownFileType {
});
}
+ public createFileFromBlob(blob: Blob, fileName: string): File {
+ return new File([blob], this.formatFileName(fileName), {
+ type: this.contentType,
+ });
+ }
+
public static parse(extension: string): KnownFileType | undefined {
return KnownFileType.allInstancesByExtension[extension];
}
diff --git a/src/lib/server_settings.ts b/src/lib/server_settings.ts
index 011c8a38..41ea3ff6 100644
--- a/src/lib/server_settings.ts
+++ b/src/lib/server_settings.ts
@@ -35,6 +35,10 @@ export function isMCPServerEnabled(): boolean {
return getServerSetting('mcp') === 1;
}
+export function isTransactionFromAIImageRecognitionEnabled(): boolean {
+ return getServerSetting('llmt') === 1;
+}
+
export function getLoginPageTips(): Record{
return getServerSetting('lpt') as Record;
}
diff --git a/src/lib/services.ts b/src/lib/services.ts
index 93b0a9fe..62654f35 100644
--- a/src/lib/services.ts
+++ b/src/lib/services.ts
@@ -21,6 +21,7 @@ import {
DEFAULT_UPLOAD_API_TIMEOUT,
DEFAULT_EXPORT_API_TIMEOUT,
DEFAULT_IMPORT_API_TIMEOUT,
+ DEFAULT_LLM_API_TIMEOUT,
GOOGLE_MAP_JAVASCRIPT_URL,
BAIDU_MAP_JAVASCRIPT_URL,
AMAP_JAVASCRIPT_URL
@@ -134,6 +135,9 @@ import type {
import type {
UserApplicationCloudSettingsUpdateRequest
} from '@/models/user_app_cloud_setting.ts';
+import type {
+ RecognizedReceiptImageResponse
+} from '@/models/large_language_model.ts';
import {
getCurrentToken,
@@ -635,6 +639,13 @@ export default {
deleteTransactionTemplate: (req: TransactionTemplateDeleteRequest): ApiResponsePromise => {
return axios.post>('v1/transaction/templates/delete.json', req);
},
+ recognizeReceiptImage: ({ imageFile }: { imageFile: File }): ApiResponsePromise => {
+ return axios.postForm>('v1/llm/transactions/recognize_receipt_image.json', {
+ image: imageFile
+ }, {
+ timeout: DEFAULT_LLM_API_TIMEOUT
+ });
+ },
getLatestExchangeRates: (param: { ignoreError?: boolean }): ApiResponsePromise => {
return axios.get>('v1/exchange_rates/latest.json', {
ignoreError: !!param.ignoreError,
diff --git a/src/lib/ui/common.ts b/src/lib/ui/common.ts
index 4f4d5841..bebc2d27 100644
--- a/src/lib/ui/common.ts
+++ b/src/lib/ui/common.ts
@@ -3,6 +3,7 @@ import Clipboard from 'clipboard';
import { ThemeType } from '@/core/theme.ts';
import { type AmountColor, PresetAmountColor } from '@/core/color.ts';
+import { KnownFileType } from '@/core/file.ts';
import logger from '../logger.ts';
@@ -134,6 +135,64 @@ export function startDownloadFile(fileName: string, fileData: Blob): void {
dataLink.click();
}
+export function compressJpgImage(file: File, maxWidth: number, maxHeight: number, quality: number): Promise {
+ return new Promise((resolve, reject) => {
+ const reader = new FileReader();
+
+ reader.onload = (event) => {
+ const img = new Image();
+
+ img.onload = () => {
+ let width = img.width;
+ let height = img.height;
+
+ if (width > maxWidth || height > maxHeight) {
+ const scale = Math.min(maxWidth / width, maxHeight / height);
+ width = Math.floor(width * scale);
+ height = Math.floor(height * scale);
+ }
+
+ const canvas = document.createElement('canvas');
+ const ctx = canvas.getContext('2d');
+
+ if (!ctx) {
+ reject(new Error('failed to get canvas context'));
+ return;
+ }
+
+ canvas.width = width;
+ canvas.height = height;
+
+ ctx.drawImage(img, 0, 0, width, height);
+
+ canvas.toBlob((blob) => {
+ if (blob) {
+ resolve(blob);
+ } else {
+ reject(new Error('failed to compress image'));
+ }
+ }, KnownFileType.JPG.contentType, quality);
+ };
+
+ img.onerror = (error) => {
+ reject(error);
+ };
+
+ if (event.target && event.target.result) {
+ img.src = event.target.result as string;
+ } else {
+ reject(new Error('failed to read file'));
+ }
+ };
+
+ reader.onerror = (error) => {
+ reject(error);
+ };
+
+ reader.readAsDataURL(file);
+ });
+}
+
export function clearBrowserCaches(): Promise {
if (!window.caches) {
logger.error('caches API is not supported in this browser');
diff --git a/src/locales/de.json b/src/locales/de.json
index 4da60c7a..6b931d14 100644
--- a/src/locales/de.json
+++ b/src/locales/de.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Abfrageelemente dürfen nicht leer sein",
"query items too much": "Zu viele Abfrageelemente",
"query items have invalid item": "Ungültiges Element in Abfrageelementen",
@@ -1389,6 +1393,7 @@
"Refresh": "Aktualisieren",
"Clear": "Löschen",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "Keine",
"Unspecified": "Nicht angegeben",
"Not set": "Nicht festgelegt",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Kategorie",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",
diff --git a/src/locales/en.json b/src/locales/en.json
index e5598911..6cd318ab 100644
--- a/src/locales/en.json
+++ b/src/locales/en.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "There are no query items",
"query items too much": "There are too many query items",
"query items have invalid item": "There is invalid item in query items",
@@ -1389,6 +1393,7 @@
"Refresh": "Refresh",
"Clear": "Clear",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "None",
"Unspecified": "Unspecified",
"Not set": "Not set",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Category",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",
diff --git a/src/locales/es.json b/src/locales/es.json
index 732ae0e4..ca940517 100644
--- a/src/locales/es.json
+++ b/src/locales/es.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "--",
"query items too much": "--",
"query items have invalid item": "Hay un elemento no válido en los elementos de consulta",
@@ -1389,6 +1393,7 @@
"Refresh": "Refrescar",
"Clear": "Claro",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "Ninguno",
"Unspecified": "No especificado",
"Not set": "No establecido",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Categoría",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",
diff --git a/src/locales/it.json b/src/locales/it.json
index eee224d6..6ebfdee2 100644
--- a/src/locales/it.json
+++ b/src/locales/it.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Non ci sono elementi di query",
"query items too much": "Ci sono troppi elementi di query",
"query items have invalid item": "C'è un elemento non valido negli elementi di query",
@@ -1389,6 +1393,7 @@
"Refresh": "Aggiorna",
"Clear": "Pulisci",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "Nessuno",
"Unspecified": "Non specificato",
"Not set": "Non impostato",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplica (con ora)",
"Duplicate (With Geographic Location)": "Duplica (con posizione geografica)",
"Duplicate (With Time and Geographic Location)": "Duplica (con ora e posizione geografica)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Categoria",
"Secondary Category": "Categoria secondaria",
"Expense Category": "Expense Category",
diff --git a/src/locales/ja.json b/src/locales/ja.json
index 2548ff65..4ed42e63 100644
--- a/src/locales/ja.json
+++ b/src/locales/ja.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "クエリ項目がありません",
"query items too much": "クエリ項目が多すぎます",
"query items have invalid item": "クエリ項目に無効な項目があります",
@@ -1389,6 +1393,7 @@
"Refresh": "リフレッシュ",
"Clear": "消去",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "なし",
"Unspecified": "不特定",
"Not set": "セットしていない",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "複製(時間含む)",
"Duplicate (With Geographic Location)": "複製(地理座標を含む)",
"Duplicate (With Time and Geographic Location)": "複製(時間と地理座標を含む)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "カテゴリ",
"Secondary Category": "二次カテゴリ",
"Expense Category": "Expense Category",
diff --git a/src/locales/nl.json b/src/locales/nl.json
index f303a99e..a0119750 100644
--- a/src/locales/nl.json
+++ b/src/locales/nl.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Wisselkoersgegevens voor basisvaluta kunnen niet worden bijgewerkt",
"cannot delete exchange rate data for base currency": "Wisselkoersgegevens voor basisvaluta kunnen niet worden verwijderd",
"mcp server is not enabled": "MCP-server is niet ingeschakeld",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Geen zoekitems opgegeven",
"query items too much": "Te veel zoekitems",
"query items have invalid item": "Ongeldig item in zoekitems",
@@ -1389,6 +1393,7 @@
"Refresh": "Vernieuwen",
"Clear": "Wissen",
"Generate": "Genereren",
+ "Recognize": "Recognize",
"None": "Geen",
"Unspecified": "Niet gespecificeerd",
"Not set": "Niet ingesteld",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Dupliceren (met tijd)",
"Duplicate (With Geographic Location)": "Dupliceren (met geografische locatie)",
"Duplicate (With Time and Geographic Location)": "Dupliceren (met tijd en locatie)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Categorie",
"Secondary Category": "Secundaire categorie",
"Expense Category": "Uitgavecategorie",
diff --git a/src/locales/pt_BR.json b/src/locales/pt_BR.json
index 587fd5ac..7faee390 100644
--- a/src/locales/pt_BR.json
+++ b/src/locales/pt_BR.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Não é possível atualizar dados de taxa de câmbio para a moeda base",
"cannot delete exchange rate data for base currency": "Não é possível excluir dados de taxa de câmbio para a moeda base",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Não há itens de consulta",
"query items too much": "Há muitos itens de consulta",
"query items have invalid item": "Há item inválido nos itens de consulta",
@@ -1389,6 +1393,7 @@
"Refresh": "Atualizar",
"Clear": "Limpar",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "Nenhum",
"Unspecified": "Não especificado",
"Not set": "Não definido",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicar (Com Tempo)",
"Duplicate (With Geographic Location)": "Duplicar (Com Localização Geográfica)",
"Duplicate (With Time and Geographic Location)": "Duplicar (Com Tempo e Localização Geográfica)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Categoria",
"Secondary Category": "Categoria Secundária",
"Expense Category": "Expense Category",
diff --git a/src/locales/ru.json b/src/locales/ru.json
index 0c6d6e34..d5e5df63 100644
--- a/src/locales/ru.json
+++ b/src/locales/ru.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Нет элементов запроса",
"query items too much": "Слишком много элементов запроса",
"query items have invalid item": "В элементах запроса присутствует недопустимый элемент",
@@ -1389,6 +1393,7 @@
"Refresh": "Обновить",
"Clear": "Очистить",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "Нет",
"Unspecified": "Не указано",
"Not set": "Не установлено",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Категория",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",
diff --git a/src/locales/uk.json b/src/locales/uk.json
index 6501ae44..5ef83a28 100644
--- a/src/locales/uk.json
+++ b/src/locales/uk.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Елементи запиту не можуть бути порожніми",
"query items too much": "Занадто багато елементів запиту",
"query items have invalid item": "Запит містить недійсний елемент",
@@ -1389,6 +1393,7 @@
"Refresh": "Оновити",
"Clear": "Очистити",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "Немає",
"Unspecified": "Не вказано",
"Not set": "Не встановлено",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Дублювати (з часом)",
"Duplicate (With Geographic Location)": "Дублювати (з геолокацією)",
"Duplicate (With Time and Geographic Location)": "Дублювати (з часом і геолокацією)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Категорія",
"Secondary Category": "Вторинна категорія",
"Expense Category": "Expense Category",
diff --git a/src/locales/vi.json b/src/locales/vi.json
index f22e49a2..c7feb23b 100644
--- a/src/locales/vi.json
+++ b/src/locales/vi.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "Cannot update exchange rate data for base currency",
"cannot delete exchange rate data for base currency": "Cannot delete exchange rate data for base currency",
"mcp server is not enabled": "MCP Server is not enabled",
+ "llm provider is not enabled": "Large Language Model provider is not enabled",
+ "no image for AI recognition": "There is no image for AI recognition",
+ "image for AI recognition is empty": "Image for AI recognition file is empty",
+ "exceed the maximum size of image file for AI recognition": "The uploaded image for AI recognition exceeds the maximum allowed file size",
"query items cannot be blank": "Không có mục truy vấn",
"query items too much": "Có quá nhiều mục truy vấn",
"query items have invalid item": "Có mục không hợp lệ trong các mục truy vấn",
@@ -1389,6 +1393,7 @@
"Refresh": "Làm mới",
"Clear": "Xóa",
"Generate": "Generate",
+ "Recognize": "Recognize",
"None": "Không có",
"Unspecified": "Không xác định",
"Not set": "Not set",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "Duplicate (With Time)",
"Duplicate (With Geographic Location)": "Duplicate (With Geographic Location)",
"Duplicate (With Time and Geographic Location)": "Duplicate (With Time and Geographic Location)",
+ "AI Image Recognition": "AI Image Recognition",
+ "Choose from Library": "Choose from Library",
+ "Take Photo": "Take Photo",
+ "Unable to load image": "Unable to load image",
+ "Unable to recognize image": "Unable to recognize image",
+ "Drag and drop a receipt or transaction image here, or click to select one": "Drag and drop a receipt or transaction image here, or click to select one",
+ "Release to load image": "Release to load image",
+ "Please select a receipt or transaction image first": "Please select a receipt or transaction image first",
"Category": "Danh mục",
"Secondary Category": "Secondary Category",
"Expense Category": "Expense Category",
diff --git a/src/locales/zh_Hans.json b/src/locales/zh_Hans.json
index a1cfa150..0504cb38 100644
--- a/src/locales/zh_Hans.json
+++ b/src/locales/zh_Hans.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "不能更新默认货币的汇率数据",
"cannot delete exchange rate data for base currency": "不能删除默认货币的汇率数据",
"mcp server is not enabled": "MCP 服务器没有启用",
+ "llm provider is not enabled": "大语言模型服务提供者没有启用",
+ "no image for AI recognition": "没有用于AI识别的图片",
+ "image for AI recognition is empty": "用于AI识别的图片为空",
+ "exceed the maximum size of image file for AI recognition": "用于AI识别的图片超出了允许的最大文件大小",
"query items cannot be blank": "请求项目不能为空",
"query items too much": "请求项目过多",
"query items have invalid item": "请求项目中有非法项目",
@@ -1389,6 +1393,7 @@
"Refresh": "刷新",
"Clear": "清除",
"Generate": "生成",
+ "Recognize": "识别",
"None": "无",
"Unspecified": "未指定",
"Not set": "未设置",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "复制 (含时间)",
"Duplicate (With Geographic Location)": "复制 (含地理位置)",
"Duplicate (With Time and Geographic Location)": "复制 (含时间和地理位置)",
+ "AI Image Recognition": "AI识图",
+ "Choose from Library": "从图库选择",
+ "Take Photo": "拍照",
+ "Unable to load image": "无法加载图片",
+ "Unable to recognize image": "无法识别图片",
+ "Drag and drop a receipt or transaction image here, or click to select one": "拖拽收据或交易图片到此处,或点击选择图片",
+ "Release to load image": "释放以加载图片",
+ "Please select a receipt or transaction image first": "请先选择收据或交易图片",
"Category": "分类",
"Secondary Category": "二级分类",
"Expense Category": "支出分类",
diff --git a/src/locales/zh_Hant.json b/src/locales/zh_Hant.json
index 7eb66c95..3614c9b5 100644
--- a/src/locales/zh_Hant.json
+++ b/src/locales/zh_Hant.json
@@ -1224,6 +1224,10 @@
"cannot update exchange rate data for base currency": "不能更新基準貨幣的匯率資料",
"cannot delete exchange rate data for base currency": "不能刪除基準貨幣的匯率資料",
"mcp server is not enabled": "MCP 伺服器未啟用",
+ "llm provider is not enabled": "大型語言模型服務提供者未啟用",
+ "no image for AI recognition": "沒有用於AI識別的圖片檔案",
+ "image for AI recognition is empty": "用於AI識別的圖片檔案為空",
+ "exceed the maximum size of image file for AI recognition": "用於AI識別的圖片超出了允許的最大檔案大小",
"query items cannot be blank": "查詢項目不能為空",
"query items too much": "查詢項目過多",
"query items have invalid item": "查詢項目中有非法項目",
@@ -1389,6 +1393,7 @@
"Refresh": "重新載入",
"Clear": "清除",
"Generate": "產生",
+ "Recognize": "識別",
"None": "無",
"Unspecified": "未指定",
"Not set": "未設置",
@@ -1719,6 +1724,14 @@
"Duplicate (With Time)": "複製 (含時間)",
"Duplicate (With Geographic Location)": "複製 (含地理位置)",
"Duplicate (With Time and Geographic Location)": "複製 (含時間和地理位置)",
+ "AI Image Recognition": "AI識圖",
+ "Choose from Library": "從相簿選擇",
+ "Take Photo": "拍照",
+ "Unable to load image": "無法載入圖片",
+ "Unable to recognize image": "無法識別圖片",
+ "Drag and drop a receipt or transaction image here, or click to select one": "將收據或交易圖片拖放到此處,或點擊以選擇圖片",
+ "Release to load image": "放開以載入圖片",
+ "Please select a receipt or transaction image first": "請先選擇收據或交易圖片",
"Category": "分類",
"Secondary Category": "次分類",
"Expense Category": "支出分類",
diff --git a/src/mobile-main.ts b/src/mobile-main.ts
index 7713f3dc..8822f19f 100644
--- a/src/mobile-main.ts
+++ b/src/mobile-main.ts
@@ -79,6 +79,7 @@ import MapSheet from '@/components/mobile/MapSheet.vue';
import TransactionTagSelectionSheet from '@/components/mobile/TransactionTagSelectionSheet.vue';
import ScheduleFrequencySheet from '@/components/mobile/ScheduleFrequencySheet.vue';
import AccountBalanceTrendsBarChart from '@/components/mobile/AccountBalanceTrendsBarChart.vue';
+import AIImageRecognitionSheet from '@/components/mobile/AIImageRecognitionSheet.vue';
import TextareaAutoSize from '@/directives/mobile/textareaAutoSize.ts';
@@ -170,8 +171,9 @@ app.component('InformationSheet', InformationSheet);
app.component('NumberPadSheet', NumberPadSheet);
app.component('MapSheet', MapSheet);
app.component('TransactionTagSelectionSheet', TransactionTagSelectionSheet);
-app.component('AccountBalanceTrendsBarChart', AccountBalanceTrendsBarChart);
app.component('ScheduleFrequencySheet', ScheduleFrequencySheet);
+app.component('AccountBalanceTrendsBarChart', AccountBalanceTrendsBarChart);
+app.component('AIImageRecognitionSheet', AIImageRecognitionSheet);
app.directive('TextareaAutoSize', TextareaAutoSize);
diff --git a/src/models/large_language_model.ts b/src/models/large_language_model.ts
new file mode 100644
index 00000000..2a9f3a78
--- /dev/null
+++ b/src/models/large_language_model.ts
@@ -0,0 +1,11 @@
+export interface RecognizedReceiptImageResponse {
+ readonly type: number;
+ readonly time?: number;
+ readonly categoryId?: string;
+ readonly sourceAccountId?: string;
+ readonly destinationAccountId?: string;
+ readonly sourceAmount?: number;
+ readonly destinationAmount?: number;
+ readonly tagIds?: string[];
+ readonly comment?: string;
+}
diff --git a/src/stores/transaction.ts b/src/stores/transaction.ts
index 462ee04c..bfd86854 100644
--- a/src/stores/transaction.ts
+++ b/src/stores/transaction.ts
@@ -33,6 +33,9 @@ import {
import {
type ExportTransactionDataRequest
} from '@/models/data_management.ts';
+import type {
+ RecognizedReceiptImageResponse
+} from '@/models/large_language_model.ts';
import {
getUserTransactionDraft,
@@ -1157,6 +1160,31 @@ export const useTransactionsStore = defineStore('transactions', () => {
});
}
+ function recognizeReceiptImage({ imageFile }: { imageFile: File }): Promise {
+ return new Promise((resolve, reject) => {
+ services.recognizeReceiptImage({ imageFile }).then(response => {
+ const data = response.data;
+
+ if (!data || !data.success || !data.result) {
+ reject({ message: 'Unable to recognize image' });
+ return;
+ }
+
+ resolve(data.result);
+ }).catch(error => {
+ logger.error('failed to recognize image', error);
+
+ if (error.response && error.response.data && error.response.data.errorMessage) {
+ reject({ error: error.response.data });
+ } else if (!error.processed) {
+ reject({ message: 'Unable to recognize image' });
+ } else {
+ reject(error);
+ }
+ });
+ });
+ }
+
function parseImportDsvFile({ fileType, fileEncoding, importFile }: { fileType: string, fileEncoding?: string, importFile: File }): Promise {
return new Promise((resolve, reject) => {
services.parseImportDsvFile({ fileType, fileEncoding, importFile }).then(response => {
@@ -1370,6 +1398,7 @@ export const useTransactionsStore = defineStore('transactions', () => {
getTransaction,
saveTransaction,
deleteTransaction,
+ recognizeReceiptImage,
parseImportDsvFile,
parseImportTransaction,
importTransactions,
diff --git a/src/views/desktop/transactions/ListPage.vue b/src/views/desktop/transactions/ListPage.vue
index 36ef9125..84658ba2 100644
--- a/src/views/desktop/transactions/ListPage.vue
+++ b/src/views/desktop/transactions/ListPage.vue
@@ -63,11 +63,16 @@
{{ tt('Add') }}
-
+
-
+
@@ -620,6 +625,7 @@
@error="onShowDateRangeError" />
+
@@ -647,6 +653,7 @@ import PaginationButtons from '@/components/desktop/PaginationButtons.vue';
import ConfirmDialog from '@/components/desktop/ConfirmDialog.vue';
import SnackBar from '@/components/desktop/SnackBar.vue';
import EditDialog from './list/dialogs/EditDialog.vue';
+import AIImageRecognitionDialog from './list/dialogs/AIImageRecognitionDialog.vue';
import ImportDialog from './import/ImportDialog.vue';
import AccountFilterSettingsCard from '@/views/desktop/common/cards/AccountFilterSettingsCard.vue';
import CategoryFilterSettingsCard from '@/views/desktop/common/cards/CategoryFilterSettingsCard.vue';
@@ -716,7 +723,7 @@ import {
categoryTypeToTransactionType,
transactionTypeToCategoryType
} from '@/lib/category.ts';
-import { isDataExportingEnabled, isDataImportingEnabled } from '@/lib/server_settings.ts';
+import { isDataExportingEnabled, isDataImportingEnabled, isTransactionFromAIImageRecognitionEnabled } from '@/lib/server_settings.ts';
import { startDownloadFile } from '@/lib/ui/common.ts';
import { scrollToSelectedItem } from '@/lib/ui/desktop.ts';
import logger from '@/lib/logger.ts';
@@ -738,6 +745,7 @@ import {
mdiMinusBoxMultipleOutline,
mdiCloseBoxMultipleOutline,
mdiPound,
+ mdiMagicStaff,
mdiTextBoxOutline
} from '@mdi/js';
@@ -760,6 +768,7 @@ const props = defineProps();
type ConfirmDialogType = InstanceType;
type SnackBarType = InstanceType;
type EditDialogType = InstanceType;
+type AIImageRecognitionDialogType = InstanceType;
type ImportDialogType = InstanceType;
interface TransactionTemplateWithIcon {
@@ -859,6 +868,7 @@ const tagFilterMenu = useTemplateRef('tagFilterMenu');
const confirmDialog = useTemplateRef('confirmDialog');
const snackbar = useTemplateRef('snackbar');
const editDialog = useTemplateRef('editDialog');
+const aiImageRecognitionDialog = useTemplateRef('aiImageRecognitionDialog');
const importDialog = useTemplateRef('importDialog');
const activeTab = ref('transactionPage');
@@ -1597,6 +1607,33 @@ function add(template?: TransactionTemplate): void {
});
}
+function addByRecognizingImage(): void {
+ aiImageRecognitionDialog.value?.open().then(result => {
+ editDialog.value?.open({
+ time: result.time,
+ type: result.type,
+ categoryId: result.categoryId,
+ accountId: result.sourceAccountId,
+ destinationAccountId: result.destinationAccountId,
+ amount: result.sourceAmount,
+ destinationAmount: result.destinationAmount,
+ tagIds: result.tagIds ? result.tagIds.join(',') : undefined,
+ comment: result.comment,
+ noTransactionDraft: true
+ }).then(result => {
+ if (result && result.message) {
+ snackbar.value?.showMessage(result.message);
+ }
+
+ reload(false, false);
+ }).catch(error => {
+ if (error) {
+ snackbar.value?.showError(error);
+ }
+ });
+ });
+}
+
function importTransaction(): void {
importDialog.value?.open().then(() => {
reload(false, false);
diff --git a/src/views/desktop/transactions/list/dialogs/AIImageRecognitionDialog.vue b/src/views/desktop/transactions/list/dialogs/AIImageRecognitionDialog.vue
new file mode 100644
index 00000000..1d79f6f7
--- /dev/null
+++ b/src/views/desktop/transactions/list/dialogs/AIImageRecognitionDialog.vue
@@ -0,0 +1,208 @@
+
+
+
+
+
+
{{ tt('AI Image Recognition') }}
+
+
+
+
+
+
+
{{ tt('Drag and drop a receipt or transaction image here, or click to select one') }}