mirror of
https://github.com/mayswind/ezbookkeeping.git
synced 2026-05-18 16:54:25 +08:00
create transactions from AI receipt image recognition results
This commit is contained in:
@@ -0,0 +1,345 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"strings"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/errs"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/llm"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/log"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/models"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/services"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/settings"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/templates"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/utils"
|
||||
)
|
||||
|
||||
// LargeLanguageModelsApi represents large language models api
|
||||
type LargeLanguageModelsApi struct {
|
||||
ApiUsingConfig
|
||||
transactionCategories *services.TransactionCategoryService
|
||||
transactionTags *services.TransactionTagService
|
||||
accounts *services.AccountService
|
||||
users *services.UserService
|
||||
}
|
||||
|
||||
// Initialize a large language models api singleton instance
|
||||
var (
|
||||
LargeLanguageModels = &LargeLanguageModelsApi{
|
||||
ApiUsingConfig: ApiUsingConfig{
|
||||
container: settings.Container,
|
||||
},
|
||||
transactionCategories: services.TransactionCategories,
|
||||
transactionTags: services.TransactionTags,
|
||||
accounts: services.Accounts,
|
||||
users: services.Users,
|
||||
}
|
||||
)
|
||||
|
||||
// RecognizeReceiptImageHandler returns the recognized receipt image result
|
||||
func (a *LargeLanguageModelsApi) RecognizeReceiptImageHandler(c *core.WebContext) (any, *errs.Error) {
|
||||
if a.CurrentConfig().LLMProvider == "" || !a.CurrentConfig().TransactionFromAIImageRecognition {
|
||||
return nil, errs.ErrLargeLanguageModelProviderNotEnabled
|
||||
}
|
||||
|
||||
utcOffset, err := c.GetClientTimezoneOffset()
|
||||
|
||||
if err != nil {
|
||||
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] cannot get client timezone offset, because %s", err.Error())
|
||||
return nil, errs.ErrClientTimezoneOffsetInvalid
|
||||
}
|
||||
|
||||
uid := c.GetCurrentUid()
|
||||
user, err := a.users.GetUserById(c, uid)
|
||||
|
||||
if err != nil {
|
||||
if !errs.IsCustomError(err) {
|
||||
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get user for user \"uid:%d\", because %s", uid, err.Error())
|
||||
}
|
||||
|
||||
return false, errs.ErrUserNotFound
|
||||
}
|
||||
|
||||
if user.FeatureRestriction.Contains(core.USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION) {
|
||||
return false, errs.ErrNotPermittedToPerformThisAction
|
||||
}
|
||||
|
||||
form, err := c.MultipartForm()
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get multi-part form data for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrParameterInvalid
|
||||
}
|
||||
|
||||
imageFiles := form.File["image"]
|
||||
|
||||
if len(imageFiles) < 1 {
|
||||
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] there is no image in request for user \"uid:%d\"", uid)
|
||||
return nil, errs.ErrNoAIRecognitionImage
|
||||
}
|
||||
|
||||
if imageFiles[0].Size < 1 {
|
||||
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the size of image in request is zero for user \"uid:%d\"", uid)
|
||||
return nil, errs.ErrAIRecognitionImageIsEmpty
|
||||
}
|
||||
|
||||
if imageFiles[0].Size > int64(a.CurrentConfig().MaxAIRecognitionPictureFileSize) {
|
||||
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the upload file size \"%d\" exceeds the maximum size \"%d\" of image for user \"uid:%d\"", imageFiles[0].Size, a.CurrentConfig().MaxAIRecognitionPictureFileSize, uid)
|
||||
return nil, errs.ErrExceedMaxAIRecognitionImageFileSize
|
||||
}
|
||||
|
||||
fileExtension := utils.GetFileNameExtension(imageFiles[0].Filename)
|
||||
|
||||
if utils.GetImageContentType(fileExtension) == "" {
|
||||
log.Warnf(c, "[large_language_models.RecognizeReceiptImageHandler] the file extension \"%s\" of image in request is not supported for user \"uid:%d\"", fileExtension, uid)
|
||||
return nil, errs.ErrImageTypeNotSupported
|
||||
}
|
||||
|
||||
imageFile, err := imageFiles[0].Open()
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get image file from request for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrOperationFailed
|
||||
}
|
||||
|
||||
defer imageFile.Close()
|
||||
|
||||
imageData, err := io.ReadAll(imageFile)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to read image file from request for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrOperationFailed
|
||||
}
|
||||
|
||||
accounts, err := a.accounts.GetAllAccountsByUid(c, uid)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get all accounts for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.Or(err, errs.ErrOperationFailed)
|
||||
}
|
||||
|
||||
accountMap := a.accounts.GetVisibleAccountNameMapByList(accounts)
|
||||
accountNames := make([]string, 0, len(accounts))
|
||||
|
||||
for i := 0; i < len(accounts); i++ {
|
||||
if accounts[i].Hidden || accounts[i].Type == models.ACCOUNT_TYPE_MULTI_SUB_ACCOUNTS {
|
||||
continue
|
||||
}
|
||||
|
||||
accountNames = append(accountNames, accounts[i].Name)
|
||||
}
|
||||
|
||||
categories, err := a.transactionCategories.GetAllCategoriesByUid(c, uid, 0, -1)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get categories for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.Or(err, errs.ErrOperationFailed)
|
||||
}
|
||||
|
||||
incomeCategoryMap := make(map[string]*models.TransactionCategory)
|
||||
incomeCategoryNames := make([]string, 0)
|
||||
|
||||
expenseCategoryMap := make(map[string]*models.TransactionCategory)
|
||||
expenseCategoryNames := make([]string, 0)
|
||||
|
||||
transferCategoryMap := make(map[string]*models.TransactionCategory)
|
||||
transferCategoryNames := make([]string, 0)
|
||||
|
||||
for i := 0; i < len(categories); i++ {
|
||||
category := categories[i]
|
||||
|
||||
if category.Hidden || category.ParentCategoryId == models.LevelOneTransactionCategoryParentId {
|
||||
continue
|
||||
}
|
||||
|
||||
if category.Type == models.CATEGORY_TYPE_INCOME {
|
||||
incomeCategoryMap[category.Name] = category
|
||||
incomeCategoryNames = append(incomeCategoryNames, category.Name)
|
||||
} else if category.Type == models.CATEGORY_TYPE_EXPENSE {
|
||||
expenseCategoryMap[category.Name] = category
|
||||
expenseCategoryNames = append(expenseCategoryNames, category.Name)
|
||||
} else if category.Type == models.CATEGORY_TYPE_TRANSFER {
|
||||
transferCategoryMap[category.Name] = category
|
||||
transferCategoryNames = append(transferCategoryNames, category.Name)
|
||||
}
|
||||
}
|
||||
|
||||
tags, err := a.transactionTags.GetAllTagsByUid(c, uid)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to get tags for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.Or(err, errs.ErrOperationFailed)
|
||||
}
|
||||
|
||||
tagMap := a.transactionTags.GetVisibleTagNameMapByList(tags)
|
||||
tagNames := make([]string, 0, len(tags))
|
||||
|
||||
for i := 0; i < len(tags); i++ {
|
||||
if tags[i].Hidden {
|
||||
continue
|
||||
}
|
||||
|
||||
tagNames = append(tagNames, tags[i].Name)
|
||||
}
|
||||
|
||||
systemPrompt, err := templates.GetTemplate(templates.SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION)
|
||||
|
||||
if err != nil {
|
||||
return nil, errs.Or(err, errs.ErrOperationFailed)
|
||||
}
|
||||
|
||||
systemPromptParams := map[string]any{
|
||||
"AllExpenseCategoryNames": strings.Join(expenseCategoryNames, "\n"),
|
||||
"AllIncomeCategoryNames": strings.Join(incomeCategoryNames, "\n"),
|
||||
"AllTransferCategoryNames": strings.Join(transferCategoryNames, "\n"),
|
||||
"AllAccountNames": strings.Join(accountNames, "\n"),
|
||||
"AllTagNames": strings.Join(tagNames, "\n"),
|
||||
}
|
||||
|
||||
var bodyBuffer bytes.Buffer
|
||||
err = systemPrompt.Execute(&bodyBuffer, systemPromptParams)
|
||||
|
||||
if err != nil {
|
||||
return nil, errs.Or(err, errs.ErrOperationFailed)
|
||||
}
|
||||
|
||||
llmRequest := &llm.LargeLanguageModelRequest{
|
||||
Stream: false,
|
||||
SystemPrompt: strings.ReplaceAll(bodyBuffer.String(), "\r\n", "\n"),
|
||||
UserPrompt: imageData,
|
||||
UserPromptType: llm.LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
|
||||
}
|
||||
|
||||
llmResponse, err := llm.Container.GetJsonResponseByReceiptImageRecognitionModel(c, c.GetCurrentUid(), a.CurrentConfig(), llmRequest)
|
||||
|
||||
if err != nil {
|
||||
return nil, errs.Or(err, errs.ErrOperationFailed)
|
||||
}
|
||||
|
||||
var result *models.RecognizedReceiptImageResult
|
||||
|
||||
if err := json.Unmarshal([]byte(llmResponse.Content), &result); err != nil {
|
||||
log.Errorf(c, "[large_language_models.RecognizeReceiptImageHandler] failed to unmarshal recognized receipt image result from llm response \"%s\" for user \"uid:%d\", because %s", llmResponse.Content, uid, err.Error())
|
||||
return nil, errs.Or(err, errs.ErrOperationFailed)
|
||||
}
|
||||
|
||||
return a.parseRecognizedReceiptImageResponse(c, uid, utcOffset, result, accountMap, expenseCategoryMap, incomeCategoryMap, transferCategoryMap, tagMap)
|
||||
}
|
||||
|
||||
func (a *LargeLanguageModelsApi) parseRecognizedReceiptImageResponse(c *core.WebContext, uid int64, utcOffset int16, recognizedResult *models.RecognizedReceiptImageResult, accountMap map[string]*models.Account, expenseCategoryMap map[string]*models.TransactionCategory, incomeCategoryMap map[string]*models.TransactionCategory, transferCategoryMap map[string]*models.TransactionCategory, tagMap map[string]*models.TransactionTag) (*models.RecognizedReceiptImageResponse, *errs.Error) {
|
||||
recognizedReceiptImageResponse := &models.RecognizedReceiptImageResponse{
|
||||
Type: models.TRANSACTION_TYPE_EXPENSE,
|
||||
}
|
||||
|
||||
if recognizedResult == nil {
|
||||
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed result is null")
|
||||
return nil, errs.ErrOperationFailed
|
||||
}
|
||||
|
||||
if recognizedResult.Type == "income" {
|
||||
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_INCOME
|
||||
|
||||
if len(recognizedResult.CategoryName) > 0 {
|
||||
category, exists := incomeCategoryMap[recognizedResult.CategoryName]
|
||||
|
||||
if exists {
|
||||
recognizedReceiptImageResponse.CategoryId = category.CategoryId
|
||||
}
|
||||
}
|
||||
} else if recognizedResult.Type == "expense" {
|
||||
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_EXPENSE
|
||||
|
||||
if len(recognizedResult.CategoryName) > 0 {
|
||||
category, exists := expenseCategoryMap[recognizedResult.CategoryName]
|
||||
|
||||
if exists {
|
||||
recognizedReceiptImageResponse.CategoryId = category.CategoryId
|
||||
}
|
||||
}
|
||||
} else if recognizedResult.Type == "transfer" {
|
||||
recognizedReceiptImageResponse.Type = models.TRANSACTION_TYPE_TRANSFER
|
||||
|
||||
if len(recognizedResult.CategoryName) > 0 {
|
||||
category, exists := transferCategoryMap[recognizedResult.CategoryName]
|
||||
|
||||
if exists {
|
||||
recognizedReceiptImageResponse.CategoryId = category.CategoryId
|
||||
}
|
||||
}
|
||||
} else {
|
||||
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed transaction type \"%s\" is invalid", recognizedResult.Type)
|
||||
return nil, errs.ErrOperationFailed
|
||||
}
|
||||
|
||||
if len(recognizedResult.Time) > 0 {
|
||||
timestamp, err := utils.ParseFromLongDateTime(recognizedResult.Time, utcOffset)
|
||||
|
||||
if err != nil {
|
||||
log.Warnf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed time \"%s\" is invalid", recognizedResult.Time)
|
||||
} else {
|
||||
recognizedReceiptImageResponse.Time = timestamp.Unix()
|
||||
}
|
||||
}
|
||||
|
||||
if len(recognizedResult.Amount) > 0 {
|
||||
amount, err := utils.ParseAmount(recognizedResult.Amount)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed amount \"%s\" is invalid", recognizedResult.Amount)
|
||||
return nil, errs.ErrOperationFailed
|
||||
}
|
||||
|
||||
recognizedReceiptImageResponse.SourceAmount = amount
|
||||
|
||||
if recognizedReceiptImageResponse.Type == models.TRANSACTION_TYPE_TRANSFER && len(recognizedResult.DestinationAmount) > 0 {
|
||||
destinationAmount, err := utils.ParseAmount(recognizedResult.DestinationAmount)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[large_language_models.parseRecognizedReceiptImageResponse] recoginzed destination amount \"%s\" is invalid", recognizedResult.DestinationAmount)
|
||||
return nil, errs.ErrOperationFailed
|
||||
}
|
||||
|
||||
recognizedReceiptImageResponse.DestinationAmount = destinationAmount
|
||||
}
|
||||
}
|
||||
|
||||
if len(recognizedResult.AccountName) > 0 {
|
||||
account, exists := accountMap[recognizedResult.AccountName]
|
||||
|
||||
if exists {
|
||||
recognizedReceiptImageResponse.SourceAccountId = account.AccountId
|
||||
}
|
||||
}
|
||||
|
||||
if len(recognizedResult.DestinationAccountName) > 0 {
|
||||
account, exists := accountMap[recognizedResult.DestinationAccountName]
|
||||
|
||||
if exists {
|
||||
recognizedReceiptImageResponse.DestinationAccountId = account.AccountId
|
||||
}
|
||||
}
|
||||
|
||||
if len(recognizedResult.TagNames) > 0 {
|
||||
tagIds := make([]string, 0, len(recognizedResult.TagNames))
|
||||
|
||||
for i := 0; i < len(recognizedResult.TagNames); i++ {
|
||||
tagName := recognizedResult.TagNames[i]
|
||||
tag, exists := tagMap[tagName]
|
||||
|
||||
if exists {
|
||||
tagIds = append(tagIds, utils.Int64ToString(tag.TagId))
|
||||
}
|
||||
}
|
||||
|
||||
recognizedReceiptImageResponse.TagIds = tagIds
|
||||
}
|
||||
|
||||
if len(recognizedResult.Description) > 0 {
|
||||
recognizedReceiptImageResponse.Comment = recognizedResult.Description
|
||||
}
|
||||
|
||||
return recognizedReceiptImageResponse, nil
|
||||
}
|
||||
@@ -47,6 +47,12 @@ func (a *ServerSettingsApi) ServerSettingsJavascriptHandler(c *core.WebContext)
|
||||
a.appendBooleanSetting(builder, "mcp", config.EnableMCPServer)
|
||||
}
|
||||
|
||||
if config.LLMProvider != "" {
|
||||
if config.TransactionFromAIImageRecognition {
|
||||
a.appendBooleanSetting(builder, "llmt", config.TransactionFromAIImageRecognition)
|
||||
}
|
||||
}
|
||||
|
||||
if config.LoginPageTips.Enabled {
|
||||
a.appendMultiLanguageTipSetting(builder, "lpt", config.LoginPageTips)
|
||||
}
|
||||
|
||||
@@ -76,19 +76,20 @@ type UserFeatureRestrictionType uint64
|
||||
|
||||
// User Feature Restriction Type
|
||||
const (
|
||||
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1
|
||||
USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2
|
||||
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3
|
||||
USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4
|
||||
USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5
|
||||
USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6
|
||||
USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7
|
||||
USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8
|
||||
USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9
|
||||
USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10
|
||||
USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11
|
||||
USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12
|
||||
USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13
|
||||
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD UserFeatureRestrictionType = 1
|
||||
USER_FEATURE_RESTRICTION_TYPE_UPDATE_EMAIL UserFeatureRestrictionType = 2
|
||||
USER_FEATURE_RESTRICTION_TYPE_UPDATE_PROFILE_BASIC_INFO UserFeatureRestrictionType = 3
|
||||
USER_FEATURE_RESTRICTION_TYPE_UPDATE_AVATAR UserFeatureRestrictionType = 4
|
||||
USER_FEATURE_RESTRICTION_TYPE_REVOKE_OTHER_SESSION UserFeatureRestrictionType = 5
|
||||
USER_FEATURE_RESTRICTION_TYPE_ENABLE_2FA UserFeatureRestrictionType = 6
|
||||
USER_FEATURE_RESTRICTION_TYPE_DISABLE_2FA UserFeatureRestrictionType = 7
|
||||
USER_FEATURE_RESTRICTION_TYPE_FORGET_PASSWORD UserFeatureRestrictionType = 8
|
||||
USER_FEATURE_RESTRICTION_TYPE_IMPORT_TRANSACTION UserFeatureRestrictionType = 9
|
||||
USER_FEATURE_RESTRICTION_TYPE_EXPORT_TRANSACTION UserFeatureRestrictionType = 10
|
||||
USER_FEATURE_RESTRICTION_TYPE_CLEAR_ALL_DATA UserFeatureRestrictionType = 11
|
||||
USER_FEATURE_RESTRICTION_TYPE_SYNC_APPLICATION_SETTINGS UserFeatureRestrictionType = 12
|
||||
USER_FEATURE_RESTRICTION_TYPE_MCP_ACCESS UserFeatureRestrictionType = 13
|
||||
USER_FEATURE_RESTRICTION_TYPE_CREATE_TRANSACTION_FROM_AI_IMAGE_RECOGNITION UserFeatureRestrictionType = 14
|
||||
)
|
||||
|
||||
const userFeatureRestrictionTypeMinValue UserFeatureRestrictionType = USER_FEATURE_RESTRICTION_TYPE_UPDATE_PASSWORD
|
||||
|
||||
@@ -40,6 +40,7 @@ const (
|
||||
NormalSubcategoryConverter = 12
|
||||
NormalSubcategoryUserCustomExchangeRate = 13
|
||||
NormalSubcategoryModelContextProtocol = 14
|
||||
NormalSubcategoryLargeLanguageModel = 15
|
||||
)
|
||||
|
||||
// Error represents the specific error returned to user
|
||||
|
||||
@@ -0,0 +1,11 @@
|
||||
package errs
|
||||
|
||||
import "net/http"
|
||||
|
||||
// Error codes related to large language model features
|
||||
var (
|
||||
ErrLargeLanguageModelProviderNotEnabled = NewNormalError(NormalSubcategoryLargeLanguageModel, 0, http.StatusBadRequest, "llm provider is not enabled")
|
||||
ErrNoAIRecognitionImage = NewNormalError(NormalSubcategoryLargeLanguageModel, 1, http.StatusBadRequest, "no image for AI recognition")
|
||||
ErrAIRecognitionImageIsEmpty = NewNormalError(NormalSubcategoryLargeLanguageModel, 2, http.StatusBadRequest, "image for AI recognition is empty")
|
||||
ErrExceedMaxAIRecognitionImageFileSize = NewNormalError(NormalSubcategoryLargeLanguageModel, 3, http.StatusBadRequest, "exceed the maximum size of image file for AI recognition")
|
||||
)
|
||||
@@ -24,4 +24,6 @@ var (
|
||||
ErrInvalidPasswordResetTokenExpiredTime = NewSystemError(SystemSubcategorySetting, 17, http.StatusInternalServerError, "invalid password reset token expired time")
|
||||
ErrInvalidExchangeRatesDataSource = NewSystemError(SystemSubcategorySetting, 18, http.StatusInternalServerError, "invalid exchange rates data source")
|
||||
ErrInvalidIpAddressPattern = NewSystemError(SystemSubcategorySetting, 19, http.StatusInternalServerError, "invalid ip address pattern")
|
||||
ErrInvalidLLMProvider = NewSystemError(SystemSubcategorySetting, 20, http.StatusInternalServerError, "invalid llm provider")
|
||||
ErrInvalidLLMModelId = NewSystemError(SystemSubcategorySetting, 21, http.StatusInternalServerError, "invalid llm model id")
|
||||
)
|
||||
|
||||
@@ -0,0 +1,91 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"crypto/tls"
|
||||
"io"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/errs"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/log"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/settings"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/utils"
|
||||
)
|
||||
|
||||
// HttpLargeLanguageModelProvider defines the structure of http large language model provider
|
||||
type HttpLargeLanguageModelProvider interface {
|
||||
// BuildTextualRequest returns the http request by the provider api definition
|
||||
BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error)
|
||||
|
||||
// ParseTextualResponse returns the textual response entity by the provider api definition
|
||||
ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error)
|
||||
|
||||
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
|
||||
GetReceiptImageRecognitionModelID() string
|
||||
}
|
||||
|
||||
// CommonHttpLargeLanguageModelProvider defines the structure of common http large language model provider
|
||||
type CommonHttpLargeLanguageModelProvider struct {
|
||||
LargeLanguageModelProvider
|
||||
provider HttpLargeLanguageModelProvider
|
||||
}
|
||||
|
||||
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the OpenAI common compatible large language model provider
|
||||
func (p *CommonHttpLargeLanguageModelProvider) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
|
||||
return p.getTextualResponse(c, uid, currentConfig, request, p.provider.GetReceiptImageRecognitionModelID(), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
}
|
||||
|
||||
func (p *CommonHttpLargeLanguageModelProvider) getTextualResponse(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
|
||||
if modelId == "" {
|
||||
return nil, errs.ErrInvalidLLMModelId
|
||||
}
|
||||
|
||||
transport := http.DefaultTransport.(*http.Transport).Clone()
|
||||
utils.SetProxyUrl(transport, currentConfig.LargeLanguageModelAPIProxy)
|
||||
|
||||
if currentConfig.LargeLanguageModelAPISkipTLSVerify {
|
||||
transport.TLSClientConfig = &tls.Config{
|
||||
InsecureSkipVerify: true,
|
||||
}
|
||||
}
|
||||
|
||||
client := &http.Client{
|
||||
Transport: transport,
|
||||
Timeout: time.Duration(currentConfig.LargeLanguageModelAPIRequestTimeout) * time.Millisecond,
|
||||
}
|
||||
|
||||
httpRequest, err := p.provider.BuildTextualRequest(c, uid, request, modelId, responseType)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to build requests for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
httpRequest.Header.Set("User-Agent", settings.GetUserAgent())
|
||||
|
||||
resp, err := client.Do(httpRequest)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to request large language model api for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
defer resp.Body.Close()
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
|
||||
log.Debugf(c, "[http_large_language_model_provider.getTextualResponse] response is %s", body)
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
log.Errorf(c, "[http_large_language_model_provider.getTextualResponse] failed to get large language model api response for user \"uid:%d\", because response code is %d", uid, resp.StatusCode)
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
return p.provider.ParseTextualResponse(c, uid, body, responseType)
|
||||
}
|
||||
|
||||
func newCommonHttpLargeLanguageModelProvider(provider HttpLargeLanguageModelProvider) *CommonHttpLargeLanguageModelProvider {
|
||||
return &CommonHttpLargeLanguageModelProvider{
|
||||
provider: provider,
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,33 @@
|
||||
package llm
|
||||
|
||||
import "reflect"
|
||||
|
||||
type LargeLanguageModelRequestPromptType byte
|
||||
|
||||
// Large Language Model Request Prompt Type
|
||||
const (
|
||||
LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_TEXT LargeLanguageModelRequestPromptType = 0
|
||||
LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL LargeLanguageModelRequestPromptType = 1
|
||||
)
|
||||
|
||||
type LargeLanguageModelResponseFormat byte
|
||||
|
||||
// Large Language Model Response Format
|
||||
const (
|
||||
LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_TEXT LargeLanguageModelResponseFormat = 0
|
||||
LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON LargeLanguageModelResponseFormat = 1
|
||||
)
|
||||
|
||||
// LargeLanguageModelRequest represents a request to a large language model
|
||||
type LargeLanguageModelRequest struct {
|
||||
Stream bool
|
||||
SystemPrompt string
|
||||
UserPrompt []byte
|
||||
UserPromptType LargeLanguageModelRequestPromptType
|
||||
ResponseJsonObjectType reflect.Type
|
||||
}
|
||||
|
||||
// LargeLanguageModelTextualResponse represents a textual response from a large language model
|
||||
type LargeLanguageModelTextualResponse struct {
|
||||
Content string
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/settings"
|
||||
)
|
||||
|
||||
// LargeLanguageModelProvider defines the structure of large language model provider
|
||||
type LargeLanguageModelProvider interface {
|
||||
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the large language model provider by receipt image recognition model
|
||||
GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error)
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/errs"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/settings"
|
||||
)
|
||||
|
||||
// LargeLanguageModelProviderContainer contains the current large language model provider
|
||||
type LargeLanguageModelProviderContainer struct {
|
||||
current LargeLanguageModelProvider
|
||||
}
|
||||
|
||||
// Initialize a large language model provider container singleton instance
|
||||
var (
|
||||
Container = &LargeLanguageModelProviderContainer{}
|
||||
)
|
||||
|
||||
// InitializeLargeLanguageModelProvider initializes the current large language model provider according to the config
|
||||
func InitializeLargeLanguageModelProvider(config *settings.Config) error {
|
||||
if config.LLMProvider == settings.OpenAILLMProvider {
|
||||
Container.current = NewOpenAILargeLanguageModelProvider(config)
|
||||
return nil
|
||||
} else if config.LLMProvider == settings.OpenAICompatibleLLMProvider {
|
||||
Container.current = NewOpenAICompatibleLargeLanguageModelProvider(config)
|
||||
return nil
|
||||
} else if config.LLMProvider == settings.OpenRouterLLMProvider {
|
||||
Container.current = NewOpenRouterLargeLanguageModelProvider(config)
|
||||
return nil
|
||||
} else if config.LLMProvider == settings.OllamaLLMProvider {
|
||||
Container.current = NewOllamaLargeLanguageModelProvider(config)
|
||||
return nil
|
||||
}
|
||||
|
||||
return errs.ErrInvalidLLMProvider
|
||||
}
|
||||
|
||||
// GetJsonResponseByReceiptImageRecognitionModel returns the json response from the current large language model provider by receipt image recognition model
|
||||
func (l *LargeLanguageModelProviderContainer) GetJsonResponseByReceiptImageRecognitionModel(c core.Context, uid int64, currentConfig *settings.Config, request *LargeLanguageModelRequest) (*LargeLanguageModelTextualResponse, error) {
|
||||
if Container.current == nil {
|
||||
return nil, errs.ErrInvalidLLMProvider
|
||||
}
|
||||
|
||||
return l.current.GetJsonResponseByReceiptImageRecognitionModel(c, uid, currentConfig, request)
|
||||
}
|
||||
@@ -0,0 +1,153 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/errs"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/log"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/settings"
|
||||
)
|
||||
|
||||
const ollamaChatCompletionsPath = "api/chat"
|
||||
|
||||
// OllamaLargeLanguageModelProvider defines the structure of Ollama large language model provider
|
||||
type OllamaLargeLanguageModelProvider struct {
|
||||
CommonHttpLargeLanguageModelProvider
|
||||
OllamaServerURL string
|
||||
ReceiptImageRecognitionModelID string
|
||||
}
|
||||
|
||||
// BuildTextualRequest returns the http request by Ollama provider
|
||||
func (p *OllamaLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
|
||||
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpRequest, err := http.NewRequest("POST", p.getOllamaRequestUrl(), bytes.NewReader(requestBody))
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpRequest.Header.Set("Content-Type", "application/json")
|
||||
|
||||
return httpRequest, nil
|
||||
}
|
||||
|
||||
// ParseTextualResponse returns the textual response by Ollama provider
|
||||
func (p *OllamaLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
|
||||
responseBody := make(map[string]any)
|
||||
err := json.Unmarshal(body, &responseBody)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
message, ok := responseBody["message"].(map[string]any)
|
||||
|
||||
if !ok {
|
||||
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no message found in response for user \"uid:%d\"", uid)
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
content, ok := message["content"].(string)
|
||||
|
||||
if !ok {
|
||||
log.Errorf(c, "[ollama_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
|
||||
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
|
||||
content = strings.TrimPrefix(content, "```json")
|
||||
content = strings.TrimSuffix(content, "```")
|
||||
} else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
|
||||
content = strings.TrimPrefix(content, "```")
|
||||
content = strings.TrimSuffix(content, "```")
|
||||
}
|
||||
}
|
||||
|
||||
textualResponse := &LargeLanguageModelTextualResponse{
|
||||
Content: content,
|
||||
}
|
||||
|
||||
return textualResponse, nil
|
||||
}
|
||||
|
||||
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of Ollama provider
|
||||
func (p *OllamaLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
|
||||
return p.ReceiptImageRecognitionModelID
|
||||
}
|
||||
|
||||
func (p *OllamaLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
|
||||
requestMessages := make([]any, 0)
|
||||
|
||||
if request.SystemPrompt != "" {
|
||||
requestMessages = append(requestMessages, map[string]string{
|
||||
"role": "system",
|
||||
"content": request.SystemPrompt,
|
||||
})
|
||||
}
|
||||
|
||||
if len(request.UserPrompt) > 0 {
|
||||
imageBase64Data := base64.StdEncoding.EncodeToString(request.UserPrompt)
|
||||
if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
|
||||
requestMessages = append(requestMessages, map[string]any{
|
||||
"role": "user",
|
||||
"content": "",
|
||||
"images": []string{imageBase64Data},
|
||||
})
|
||||
} else {
|
||||
requestMessages = append(requestMessages, map[string]string{
|
||||
"role": "user",
|
||||
"content": string(request.UserPrompt),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
requestBody := make(map[string]any)
|
||||
requestBody["model"] = modelId
|
||||
requestBody["stream"] = request.Stream
|
||||
requestBody["messages"] = requestMessages
|
||||
|
||||
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
|
||||
requestBody["format"] = "json"
|
||||
}
|
||||
|
||||
requestBodyBytes, err := json.Marshal(requestBody)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrOperationFailed
|
||||
}
|
||||
|
||||
log.Debugf(c, "[ollama_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
|
||||
return requestBodyBytes, nil
|
||||
}
|
||||
|
||||
func (p *OllamaLargeLanguageModelProvider) getOllamaRequestUrl() string {
|
||||
url := p.OllamaServerURL
|
||||
|
||||
if url[len(url)-1] != '/' {
|
||||
url += "/"
|
||||
}
|
||||
|
||||
url += ollamaChatCompletionsPath
|
||||
return url
|
||||
}
|
||||
|
||||
// NewOllamaLargeLanguageModelProvider creates a new Ollama large language model provider instance
|
||||
func NewOllamaLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
|
||||
return newCommonHttpLargeLanguageModelProvider(&OllamaLargeLanguageModelProvider{
|
||||
OllamaServerURL: config.OllamaServerURL,
|
||||
ReceiptImageRecognitionModelID: config.OllamaReceiptImageRecognitionModelID,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,138 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
)
|
||||
|
||||
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
|
||||
provider := &OllamaLargeLanguageModelProvider{}
|
||||
|
||||
request := &LargeLanguageModelRequest{
|
||||
SystemPrompt: "You are a helpful assistant.",
|
||||
UserPrompt: []byte("Hello, how are you?"),
|
||||
}
|
||||
|
||||
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.Nil(t, err)
|
||||
|
||||
var body map[string]interface{}
|
||||
err = json.Unmarshal(bodyBytes, &body)
|
||||
assert.Nil(t, err)
|
||||
|
||||
assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
|
||||
}
|
||||
|
||||
func TestOllamaLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
|
||||
provider := &OllamaLargeLanguageModelProvider{}
|
||||
|
||||
request := &LargeLanguageModelRequest{
|
||||
SystemPrompt: "What's in this image?",
|
||||
UserPrompt: []byte("fakedata"),
|
||||
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
|
||||
}
|
||||
|
||||
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.Nil(t, err)
|
||||
|
||||
var body map[string]interface{}
|
||||
err = json.Unmarshal(bodyBytes, &body)
|
||||
assert.Nil(t, err)
|
||||
|
||||
assert.Equal(t, "{\"format\":\"json\",\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":\"\",\"images\":[\"ZmFrZWRhdGE=\"],\"role\":\"user\"}],\"model\":\"test\",\"stream\":false}", string(bodyBytes))
|
||||
}
|
||||
|
||||
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
|
||||
provider := &OllamaLargeLanguageModelProvider{}
|
||||
|
||||
response := `{
|
||||
"model": "test",
|
||||
"created_at": "2025-09-01T01:02:03.456789Z",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "This is a test response"
|
||||
}
|
||||
}`
|
||||
|
||||
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, "This is a test response", result.Content)
|
||||
}
|
||||
|
||||
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
|
||||
provider := &OllamaLargeLanguageModelProvider{}
|
||||
|
||||
response := `{
|
||||
"model": "test",
|
||||
"created_at": "2025-09-01T01:02:03.456789Z",
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": ""
|
||||
}
|
||||
}`
|
||||
|
||||
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, "", result.Content)
|
||||
}
|
||||
|
||||
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
|
||||
provider := &OllamaLargeLanguageModelProvider{}
|
||||
|
||||
response := `{
|
||||
"model": "test",
|
||||
"created_at": "2025-09-01T01:02:03.456789Z",
|
||||
"message": {}
|
||||
}`
|
||||
|
||||
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.EqualError(t, err, "failed to request third party api")
|
||||
}
|
||||
|
||||
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
|
||||
provider := &OllamaLargeLanguageModelProvider{}
|
||||
|
||||
response := `{
|
||||
"model": "test",
|
||||
"created_at": "2025-09-01T01:02:03.456789Z",
|
||||
"message": {
|
||||
"role": "assistant"
|
||||
}
|
||||
}`
|
||||
|
||||
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.EqualError(t, err, "failed to request third party api")
|
||||
}
|
||||
|
||||
func TestOllamaLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
|
||||
provider := &OllamaLargeLanguageModelProvider{}
|
||||
|
||||
response := "error"
|
||||
|
||||
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.EqualError(t, err, "failed to request third party api")
|
||||
}
|
||||
|
||||
func TestOllamaLargeLanguageModelProvider_GetOllamaRequestUrl(t *testing.T) {
|
||||
provider := &OllamaLargeLanguageModelProvider{
|
||||
OllamaServerURL: "http://localhost:11434/",
|
||||
}
|
||||
url := provider.getOllamaRequestUrl()
|
||||
assert.Equal(t, "http://localhost:11434/api/chat", url)
|
||||
|
||||
provider = &OllamaLargeLanguageModelProvider{
|
||||
OllamaServerURL: "http://localhost:11434",
|
||||
}
|
||||
url = provider.getOllamaRequestUrl()
|
||||
assert.Equal(t, "http://localhost:11434/api/chat", url)
|
||||
|
||||
provider = &OllamaLargeLanguageModelProvider{
|
||||
OllamaServerURL: "http://example.com/ollama/",
|
||||
}
|
||||
url = provider.getOllamaRequestUrl()
|
||||
assert.Equal(t, "http://example.com/ollama/api/chat", url)
|
||||
}
|
||||
@@ -0,0 +1,187 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"github.com/invopop/jsonschema"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/errs"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/log"
|
||||
)
|
||||
|
||||
// OpenAIChatCompletionsLargeLanguageModelProvider defines the structure of OpenAI chat completions compatible large language model provider
|
||||
type OpenAIChatCompletionsLargeLanguageModelProvider interface {
|
||||
// BuildChatCompletionsHttpRequest returns the chat completions http request
|
||||
BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error)
|
||||
|
||||
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id if supported, otherwise returns empty string
|
||||
GetReceiptImageRecognitionModelID() string
|
||||
}
|
||||
|
||||
// OpenAICommonChatCompletionsHttpLargeLanguageModelProvider defines the structure of OpenAI common compatible large language model provider based on chat completions api
|
||||
type OpenAICommonChatCompletionsHttpLargeLanguageModelProvider struct {
|
||||
CommonHttpLargeLanguageModelProvider
|
||||
provider OpenAIChatCompletionsLargeLanguageModelProvider
|
||||
}
|
||||
|
||||
// BuildTextualRequest returns the http request by OpenAI common compatible provider
|
||||
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) BuildTextualRequest(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) (*http.Request, error) {
|
||||
requestBody, err := p.buildJsonRequestBody(c, uid, request, modelId, responseType)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpRequest, err := p.provider.BuildChatCompletionsHttpRequest(c, uid)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
httpRequest.Body = io.NopCloser(bytes.NewReader(requestBody))
|
||||
httpRequest.Header.Set("Content-Type", "application/json")
|
||||
|
||||
return httpRequest, nil
|
||||
}
|
||||
|
||||
// ParseTextualResponse returns the textual response by OpenAI common compatible provider
|
||||
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) ParseTextualResponse(c core.Context, uid int64, body []byte, responseType LargeLanguageModelResponseFormat) (*LargeLanguageModelTextualResponse, error) {
|
||||
responseBody := make(map[string]any)
|
||||
err := json.Unmarshal(body, &responseBody)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] failed to parse response for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
choices, ok := responseBody["choices"].([]any)
|
||||
|
||||
if !ok || len(choices) < 1 {
|
||||
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no choices found in response for user \"uid:%d\"", uid)
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
firstChoice, ok := choices[0].(map[string]any)
|
||||
|
||||
if !ok {
|
||||
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] invalid choice format in response for user \"uid:%d\"", uid)
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
message, ok := firstChoice["message"].(map[string]any)
|
||||
|
||||
if !ok {
|
||||
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no message found in choice for user \"uid:%d\"", uid)
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
content, ok := message["content"].(string)
|
||||
|
||||
if !ok {
|
||||
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.ParseTextualResponse] no content found in message for user \"uid:%d\"", uid)
|
||||
return nil, errs.ErrFailedToRequestRemoteApi
|
||||
}
|
||||
|
||||
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
|
||||
if strings.HasPrefix(content, "```json") && strings.HasSuffix(content, "```") {
|
||||
content = strings.TrimPrefix(content, "```json")
|
||||
content = strings.TrimSuffix(content, "```")
|
||||
} else if strings.HasPrefix(content, "```") && strings.HasSuffix(content, "```") {
|
||||
content = strings.TrimPrefix(content, "```")
|
||||
content = strings.TrimSuffix(content, "```")
|
||||
}
|
||||
}
|
||||
|
||||
textualResponse := &LargeLanguageModelTextualResponse{
|
||||
Content: content,
|
||||
}
|
||||
|
||||
return textualResponse, nil
|
||||
}
|
||||
|
||||
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI common compatible provider
|
||||
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
|
||||
return p.provider.GetReceiptImageRecognitionModelID()
|
||||
}
|
||||
|
||||
func (p *OpenAICommonChatCompletionsHttpLargeLanguageModelProvider) buildJsonRequestBody(c core.Context, uid int64, request *LargeLanguageModelRequest, modelId string, responseType LargeLanguageModelResponseFormat) ([]byte, error) {
|
||||
requestMessages := make([]any, 0)
|
||||
|
||||
if request.SystemPrompt != "" {
|
||||
requestMessages = append(requestMessages, map[string]string{
|
||||
"role": "system",
|
||||
"content": request.SystemPrompt,
|
||||
})
|
||||
}
|
||||
|
||||
if len(request.UserPrompt) > 0 {
|
||||
if request.UserPromptType == LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL {
|
||||
imageBase64Data := "data:image/png;base64," + base64.StdEncoding.EncodeToString(request.UserPrompt)
|
||||
requestMessages = append(requestMessages, map[string]any{
|
||||
"role": "user",
|
||||
"content": []any{
|
||||
core.O{
|
||||
"type": "image_url",
|
||||
"image_url": core.O{
|
||||
"url": imageBase64Data,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
} else {
|
||||
requestMessages = append(requestMessages, map[string]string{
|
||||
"role": "user",
|
||||
"content": string(request.UserPrompt),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
requestBody := make(map[string]any)
|
||||
requestBody["model"] = modelId
|
||||
requestBody["stream"] = request.Stream
|
||||
requestBody["messages"] = requestMessages
|
||||
|
||||
if responseType == LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON {
|
||||
if request.ResponseJsonObjectType != nil {
|
||||
schemeGenerator := jsonschema.Reflector{
|
||||
Anonymous: true,
|
||||
DoNotReference: true,
|
||||
ExpandedStruct: true,
|
||||
}
|
||||
|
||||
schema := schemeGenerator.ReflectFromType(request.ResponseJsonObjectType)
|
||||
schema.Version = ""
|
||||
|
||||
requestBody["response_format"] = core.O{
|
||||
"type": "json_schema",
|
||||
"json_schema": schema,
|
||||
}
|
||||
} else {
|
||||
requestBody["response_format"] = core.O{
|
||||
"type": "json_object",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
requestBodyBytes, err := json.Marshal(requestBody)
|
||||
|
||||
if err != nil {
|
||||
log.Errorf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] failed to marshal request body for user \"uid:%d\", because %s", uid, err.Error())
|
||||
return nil, errs.ErrOperationFailed
|
||||
}
|
||||
|
||||
log.Debugf(c, "[openai_common_compatible_large_language_model_provider.buildJsonRequestBody] request body is %s", requestBodyBytes)
|
||||
return requestBodyBytes, nil
|
||||
}
|
||||
|
||||
func newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(provider OpenAIChatCompletionsLargeLanguageModelProvider) LargeLanguageModelProvider {
|
||||
return newCommonHttpLargeLanguageModelProvider(&OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
|
||||
provider: provider,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,157 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
)
|
||||
|
||||
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_TextualUserPrompt(t *testing.T) {
|
||||
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
|
||||
provider: &OpenAILargeLanguageModelProvider{},
|
||||
}
|
||||
|
||||
request := &LargeLanguageModelRequest{
|
||||
SystemPrompt: "You are a helpful assistant.",
|
||||
UserPrompt: []byte("Hello, how are you?"),
|
||||
}
|
||||
|
||||
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.Nil(t, err)
|
||||
|
||||
var body map[string]interface{}
|
||||
err = json.Unmarshal(bodyBytes, &body)
|
||||
assert.Nil(t, err)
|
||||
|
||||
assert.Equal(t, "{\"messages\":[{\"content\":\"You are a helpful assistant.\",\"role\":\"system\"},{\"content\":\"Hello, how are you?\",\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
|
||||
}
|
||||
|
||||
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_buildJsonRequestBody_ImageUserPrompt(t *testing.T) {
|
||||
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
|
||||
provider: &OpenAILargeLanguageModelProvider{},
|
||||
}
|
||||
|
||||
request := &LargeLanguageModelRequest{
|
||||
SystemPrompt: "What's in this image?",
|
||||
UserPrompt: []byte("fakedata"),
|
||||
UserPromptType: LARGE_LANGUAGE_MODEL_REQUEST_PROMPT_TYPE_IMAGE_URL,
|
||||
}
|
||||
|
||||
bodyBytes, err := provider.buildJsonRequestBody(core.NewNullContext(), 0, request, "test", LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.Nil(t, err)
|
||||
|
||||
var body map[string]interface{}
|
||||
err = json.Unmarshal(bodyBytes, &body)
|
||||
assert.Nil(t, err)
|
||||
|
||||
assert.Equal(t, "{\"messages\":[{\"content\":\"What's in this image?\",\"role\":\"system\"},{\"content\":[{\"image_url\":{\"url\":\"data:image/png;base64,ZmFrZWRhdGE=\"},\"type\":\"image_url\"}],\"role\":\"user\"}],\"model\":\"test\",\"response_format\":{\"type\":\"json_object\"},\"stream\":false}", string(bodyBytes))
|
||||
}
|
||||
|
||||
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_ValidJsonResponse(t *testing.T) {
|
||||
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
|
||||
provider: &OpenAILargeLanguageModelProvider{},
|
||||
}
|
||||
|
||||
response := `{
|
||||
"id": "test-123",
|
||||
"object": "chat.completion",
|
||||
"created": 1234567890,
|
||||
"model": "test",
|
||||
"usage": {
|
||||
"prompt_tokens": 13,
|
||||
"completion_tokens": 7,
|
||||
"total_tokens": 20
|
||||
},
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": "This is a test response"
|
||||
}
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, "This is a test response", result.Content)
|
||||
}
|
||||
|
||||
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyResponse(t *testing.T) {
|
||||
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
|
||||
provider: &OpenAILargeLanguageModelProvider{},
|
||||
}
|
||||
|
||||
response := `{
|
||||
"id": "test-123",
|
||||
"object": "chat.completion",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant",
|
||||
"content": ""
|
||||
}
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
result, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.Nil(t, err)
|
||||
assert.Equal(t, "", result.Content)
|
||||
}
|
||||
|
||||
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_EmptyChoices(t *testing.T) {
|
||||
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
|
||||
provider: &OpenAILargeLanguageModelProvider{},
|
||||
}
|
||||
|
||||
response := `{
|
||||
"id": "test-123",
|
||||
"object": "chat.completion",
|
||||
"choices": []
|
||||
}`
|
||||
|
||||
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.EqualError(t, err, "failed to request third party api")
|
||||
}
|
||||
|
||||
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_NoChoiceContent(t *testing.T) {
|
||||
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
|
||||
provider: &OpenAILargeLanguageModelProvider{},
|
||||
}
|
||||
|
||||
response := `{
|
||||
"id": "chatcmpl-123",
|
||||
"object": "chat.completion",
|
||||
"choices": [
|
||||
{
|
||||
"finish_reason": "stop",
|
||||
"index": 0,
|
||||
"message": {
|
||||
"role": "assistant"
|
||||
}
|
||||
}
|
||||
]
|
||||
}`
|
||||
|
||||
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.EqualError(t, err, "failed to request third party api")
|
||||
}
|
||||
|
||||
func TestOpenAICommonChatCompletionsHttpLargeLanguageModelProvider_ParseTextualResponse_InvalidJson(t *testing.T) {
|
||||
provider := &OpenAICommonChatCompletionsHttpLargeLanguageModelProvider{
|
||||
provider: &OpenAILargeLanguageModelProvider{},
|
||||
}
|
||||
|
||||
response := "error"
|
||||
|
||||
_, err := provider.ParseTextualResponse(core.NewNullContext(), 0, []byte(response), LARGE_LANGUAGE_MODEL_RESPONSE_FORMAT_JSON)
|
||||
assert.EqualError(t, err, "failed to request third party api")
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/settings"
|
||||
)
|
||||
|
||||
const openAICompatibleChatCompletionsPath = "chat/completions"
|
||||
|
||||
// OpenAICompatibleLargeLanguageModelProvider defines the structure of OpenAI compatible large language model provider
|
||||
type OpenAICompatibleLargeLanguageModelProvider struct {
|
||||
OpenAIChatCompletionsLargeLanguageModelProvider
|
||||
OpenAICompatibleBaseURL string
|
||||
OpenAICompatibleAPIKey string
|
||||
ReceiptImageRecognitionModelID string
|
||||
}
|
||||
|
||||
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI compatible provider
|
||||
func (p *OpenAICompatibleLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
|
||||
req, err := http.NewRequest("POST", p.getFinalChatCompletionsRequestUrl(), nil)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if p.OpenAICompatibleAPIKey != "" {
|
||||
req.Header.Set("Authorization", "Bearer "+p.OpenAICompatibleAPIKey)
|
||||
}
|
||||
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI compatible provider
|
||||
func (p *OpenAICompatibleLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
|
||||
return p.ReceiptImageRecognitionModelID
|
||||
}
|
||||
|
||||
func (p *OpenAICompatibleLargeLanguageModelProvider) getFinalChatCompletionsRequestUrl() string {
|
||||
url := p.OpenAICompatibleBaseURL
|
||||
|
||||
if url[len(url)-1] != '/' {
|
||||
url += "/"
|
||||
}
|
||||
|
||||
url += openAICompatibleChatCompletionsPath
|
||||
return url
|
||||
}
|
||||
|
||||
// NewOpenAICompatibleLargeLanguageModelProvider creates a new OpenAI compatible large language model provider instance
|
||||
func NewOpenAICompatibleLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
|
||||
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAICompatibleLargeLanguageModelProvider{
|
||||
OpenAICompatibleBaseURL: config.OpenAICompatibleBaseURL,
|
||||
OpenAICompatibleAPIKey: config.OpenAICompatibleAPIKey,
|
||||
ReceiptImageRecognitionModelID: config.OpenAICompatibleReceiptImageRecognitionModelID,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
)
|
||||
|
||||
func TestOpenAICompatibleLargeLanguageModelProvider_GetFinalRequestUrl(t *testing.T) {
|
||||
provider := &OpenAICompatibleLargeLanguageModelProvider{
|
||||
OpenAICompatibleBaseURL: "https://api.example.com/v1/",
|
||||
}
|
||||
url := provider.getFinalChatCompletionsRequestUrl()
|
||||
assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
|
||||
|
||||
provider = &OpenAICompatibleLargeLanguageModelProvider{
|
||||
OpenAICompatibleBaseURL: "https://api.example.com/v1",
|
||||
}
|
||||
url = provider.getFinalChatCompletionsRequestUrl()
|
||||
assert.Equal(t, "https://api.example.com/v1/chat/completions", url)
|
||||
|
||||
provider = &OpenAICompatibleLargeLanguageModelProvider{
|
||||
OpenAICompatibleBaseURL: "https://example.com/api",
|
||||
}
|
||||
url = provider.getFinalChatCompletionsRequestUrl()
|
||||
assert.Equal(t, "https://example.com/api/chat/completions", url)
|
||||
}
|
||||
@@ -0,0 +1,43 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/settings"
|
||||
)
|
||||
|
||||
// OpenAILargeLanguageModelProvider defines the structure of OpenAI large language model provider
|
||||
type OpenAILargeLanguageModelProvider struct {
|
||||
OpenAIChatCompletionsLargeLanguageModelProvider
|
||||
OpenAIAPIKey string
|
||||
ReceiptImageRecognitionModelID string
|
||||
}
|
||||
|
||||
const openAIChatCompletionsUrl = "https://api.openai.com/v1/chat/completions"
|
||||
|
||||
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenAI provider
|
||||
func (p *OpenAILargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
|
||||
req, err := http.NewRequest("POST", openAIChatCompletionsUrl, nil)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("Authorization", "Bearer "+p.OpenAIAPIKey)
|
||||
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenAI provider
|
||||
func (p *OpenAILargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
|
||||
return p.ReceiptImageRecognitionModelID
|
||||
}
|
||||
|
||||
// NewOpenAILargeLanguageModelProvider creates a new OpenAI large language model provider instance
|
||||
func NewOpenAILargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
|
||||
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenAILargeLanguageModelProvider{
|
||||
OpenAIAPIKey: config.OpenAIAPIKey,
|
||||
ReceiptImageRecognitionModelID: config.OpenAIReceiptImageRecognitionModelID,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package llm
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"github.com/mayswind/ezbookkeeping/pkg/core"
|
||||
"github.com/mayswind/ezbookkeeping/pkg/settings"
|
||||
)
|
||||
|
||||
// OpenRouterLargeLanguageModelProvider defines the structure of OpenRouter large language model provider
|
||||
type OpenRouterLargeLanguageModelProvider struct {
|
||||
OpenAIChatCompletionsLargeLanguageModelProvider
|
||||
OpenRouterAPIKey string
|
||||
ReceiptImageRecognitionModelID string
|
||||
}
|
||||
|
||||
const openRouterChatCompletionsUrl = "https://openrouter.ai/api/v1/chat/completions"
|
||||
|
||||
// BuildChatCompletionsHttpRequest returns the chat completions http request by OpenRouter provider
|
||||
func (p *OpenRouterLargeLanguageModelProvider) BuildChatCompletionsHttpRequest(c core.Context, uid int64) (*http.Request, error) {
|
||||
req, err := http.NewRequest("POST", openRouterChatCompletionsUrl, nil)
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
req.Header.Set("Authorization", "Bearer "+p.OpenRouterAPIKey)
|
||||
req.Header.Set("HTTP-Referer", "https://ezbookkeeping.mayswind.net/")
|
||||
req.Header.Set("X-Title", "ezBookkeeping")
|
||||
|
||||
return req, nil
|
||||
}
|
||||
|
||||
// GetReceiptImageRecognitionModelID returns the receipt image recognition model id of OpenRouter provider
|
||||
func (p *OpenRouterLargeLanguageModelProvider) GetReceiptImageRecognitionModelID() string {
|
||||
return p.ReceiptImageRecognitionModelID
|
||||
}
|
||||
|
||||
// NewOpenRouterLargeLanguageModelProvider creates a new OpenRouter large language model provider instance
|
||||
func NewOpenRouterLargeLanguageModelProvider(config *settings.Config) LargeLanguageModelProvider {
|
||||
return newOpenAICommonChatCompletionsHttpLargeLanguageModelProvider(&OpenRouterLargeLanguageModelProvider{
|
||||
OpenRouterAPIKey: config.OpenRouterAPIKey,
|
||||
ReceiptImageRecognitionModelID: config.OpenRouterReceiptImageRecognitionModelID,
|
||||
})
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
package models
|
||||
|
||||
// RecognizedReceiptImageResponse represents a view-object of recognized receipt image response
|
||||
type RecognizedReceiptImageResponse struct {
|
||||
Type TransactionType `json:"type"`
|
||||
Time int64 `json:"time,omitempty"`
|
||||
CategoryId int64 `json:"categoryId,string,omitempty"`
|
||||
SourceAccountId int64 `json:"sourceAccountId,string,omitempty"`
|
||||
DestinationAccountId int64 `json:"destinationAccountId,string,omitempty"`
|
||||
SourceAmount int64 `json:"sourceAmount,omitempty"`
|
||||
DestinationAmount int64 `json:"destinationAmount,omitempty"`
|
||||
TagIds []string `json:"tagIds,omitempty"`
|
||||
Comment string `json:"comment,omitempty"`
|
||||
}
|
||||
|
||||
// RecognizedReceiptImageResult represents the result of recognized receipt image
|
||||
type RecognizedReceiptImageResult struct {
|
||||
Type string `json:"type,omitempty" jsonschema:"enum=income,enum=expense,enum=transfer" jsonschema_description:"Transaction type (income, expense, transfer)"`
|
||||
Time string `json:"time" jsonschema:"format=date-time" jsonschema_description:"Transaction time in long date time format (YYYY-MM-DD HH:mm:ss, e.g. 2023-01-01 12:00:00)"`
|
||||
Amount string `json:"amount,omitempty" jsonschema_description:"Transaction amount"`
|
||||
AccountName string `json:"account,omitempty" jsonschema_description:"Account name for the transaction"`
|
||||
CategoryName string `json:"category,omitempty" jsonschema_description:"Category name for the transaction"`
|
||||
TagNames []string `json:"tags,omitempty" jsonschema_description:"List of tags associated with the transaction (maximum 10 tags allowed)"`
|
||||
Description string `json:"description,omitempty" jsonschema_description:"Transaction description"`
|
||||
DestinationAmount string `json:"destination_amount,omitempty" jsonschema_description:"Destination amount for transfer transactions"`
|
||||
DestinationAccountName string `json:"destination_account,omitempty" jsonschema_description:"Destination account name for transfer transactions"`
|
||||
}
|
||||
@@ -66,6 +66,13 @@ const (
|
||||
WebDAVStorageType string = "webdav"
|
||||
)
|
||||
|
||||
const (
|
||||
OpenAILLMProvider string = "openai"
|
||||
OpenAICompatibleLLMProvider string = "openai_compatible"
|
||||
OpenRouterLLMProvider string = "openrouter"
|
||||
OllamaLLMProvider string = "ollama"
|
||||
)
|
||||
|
||||
// Uuid generator types
|
||||
const (
|
||||
InternalUuidGeneratorType string = "internal"
|
||||
@@ -140,6 +147,9 @@ const (
|
||||
|
||||
defaultWebDAVRequestTimeout uint32 = 10000 // 10 seconds
|
||||
|
||||
defaultAIRecognitionPictureMaxSize uint32 = 10485760 // 10MB
|
||||
defaultLargeLanguageModelAPIRequestTimeout uint32 = 60000 // 60 seconds
|
||||
|
||||
defaultInMemoryDuplicateCheckerCleanupInterval uint32 = 60 // 1 minutes
|
||||
defaultDuplicateSubmissionsInterval uint32 = 300 // 5 minutes
|
||||
|
||||
@@ -281,6 +291,23 @@ type Config struct {
|
||||
MinIOConfig *MinIOConfig
|
||||
WebDAVConfig *WebDAVConfig
|
||||
|
||||
// Large Language Model
|
||||
LLMProvider string
|
||||
OpenAIAPIKey string
|
||||
OpenAIReceiptImageRecognitionModelID string
|
||||
OpenAICompatibleBaseURL string
|
||||
OpenAICompatibleAPIKey string
|
||||
OpenAICompatibleReceiptImageRecognitionModelID string
|
||||
OpenRouterAPIKey string
|
||||
OpenRouterReceiptImageRecognitionModelID string
|
||||
OllamaServerURL string
|
||||
OllamaReceiptImageRecognitionModelID string
|
||||
TransactionFromAIImageRecognition bool
|
||||
MaxAIRecognitionPictureFileSize uint32
|
||||
LargeLanguageModelAPIRequestTimeout uint32
|
||||
LargeLanguageModelAPIProxy string
|
||||
LargeLanguageModelAPISkipTLSVerify bool
|
||||
|
||||
// Uuid
|
||||
UuidGeneratorType string
|
||||
UuidServerId uint8
|
||||
@@ -426,6 +453,12 @@ func LoadConfiguration(configFilePath string) (*Config, error) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = loadLLMConfiguration(config, cfgFile, "llm")
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
err = loadUuidConfiguration(config, cfgFile, "uuid")
|
||||
|
||||
if err != nil {
|
||||
@@ -751,6 +784,46 @@ func loadStorageConfiguration(config *Config, configFile *ini.File, sectionName
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadLLMConfiguration(config *Config, configFile *ini.File, sectionName string) error {
|
||||
llmProvider := getConfigItemStringValue(configFile, sectionName, "llm_provider")
|
||||
|
||||
if llmProvider == "" {
|
||||
config.LLMProvider = ""
|
||||
} else if llmProvider == OpenAILLMProvider {
|
||||
config.LLMProvider = OpenAILLMProvider
|
||||
} else if llmProvider == OpenAICompatibleLLMProvider {
|
||||
config.LLMProvider = OpenAICompatibleLLMProvider
|
||||
} else if llmProvider == OpenRouterLLMProvider {
|
||||
config.LLMProvider = OpenRouterLLMProvider
|
||||
} else if llmProvider == OllamaLLMProvider {
|
||||
config.LLMProvider = OllamaLLMProvider
|
||||
} else {
|
||||
return errs.ErrInvalidLLMProvider
|
||||
}
|
||||
|
||||
config.OpenAIAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_api_key")
|
||||
config.OpenAIReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_receipt_image_recognition_model_id")
|
||||
|
||||
config.OpenAICompatibleBaseURL = getConfigItemStringValue(configFile, sectionName, "openai_compatible_base_url")
|
||||
config.OpenAICompatibleAPIKey = getConfigItemStringValue(configFile, sectionName, "openai_compatible_api_key")
|
||||
config.OpenAICompatibleReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openai_compatible_receipt_image_recognition_model_id")
|
||||
|
||||
config.OpenRouterAPIKey = getConfigItemStringValue(configFile, sectionName, "openrouter_api_key")
|
||||
config.OpenRouterReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "openrouter_receipt_image_recognition_model_id")
|
||||
|
||||
config.OllamaServerURL = getConfigItemStringValue(configFile, sectionName, "ollama_server_url")
|
||||
config.OllamaReceiptImageRecognitionModelID = getConfigItemStringValue(configFile, sectionName, "ollama_receipt_image_recognition_model_id")
|
||||
|
||||
config.TransactionFromAIImageRecognition = getConfigItemBoolValue(configFile, sectionName, "transaction_from_ai_image_recognition", false)
|
||||
config.MaxAIRecognitionPictureFileSize = getConfigItemUint32Value(configFile, sectionName, "max_ai_recognition_picture_size", defaultAIRecognitionPictureMaxSize)
|
||||
|
||||
config.LargeLanguageModelAPIProxy = getConfigItemStringValue(configFile, sectionName, "proxy", "system")
|
||||
config.LargeLanguageModelAPIRequestTimeout = getConfigItemUint32Value(configFile, sectionName, "request_timeout", defaultLargeLanguageModelAPIRequestTimeout)
|
||||
config.LargeLanguageModelAPISkipTLSVerify = getConfigItemBoolValue(configFile, sectionName, "skip_tls_verify", false)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadUuidConfiguration(config *Config, configFile *ini.File, sectionName string) error {
|
||||
if getConfigItemStringValue(configFile, sectionName, "generator_type") == InternalUuidGeneratorType {
|
||||
config.UuidGeneratorType = InternalUuidGeneratorType
|
||||
|
||||
@@ -4,6 +4,7 @@ type KnownTemplate string
|
||||
|
||||
// Known templates
|
||||
const (
|
||||
TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email"
|
||||
TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset"
|
||||
TEMPLATE_VERIFY_EMAIL KnownTemplate = "email/verify_email"
|
||||
TEMPLATE_PASSWORD_RESET KnownTemplate = "email/password_reset"
|
||||
SYSTEM_PROMPT_RECEIPT_IMAGE_RECOGNITION KnownTemplate = "prompt/receipt_image_recognition"
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user