diff --git a/changelog/unreleased/enhancement-search-optimize-command.md b/changelog/unreleased/enhancement-search-optimize-command.md new file mode 100644 index 00000000000..6f10ccb8b3a --- /dev/null +++ b/changelog/unreleased/enhancement-search-optimize-command.md @@ -0,0 +1,8 @@ +Enhancement: Add `ocis search optimize` command + +Added a new CLI command `ocis search optimize` that compacts the search +index by merging segments without re-indexing content. This is useful for +existing instances where the index has accumulated many small segments +from incremental writes, without needing to run a full re-index. + +https://github.com/owncloud/ocis/pull/12136 diff --git a/protogen/gen/ocis/services/search/v0/mocks/search_provider_service.go b/protogen/gen/ocis/services/search/v0/mocks/search_provider_service.go index ba73046b52e..4544427e2a7 100644 --- a/protogen/gen/ocis/services/search/v0/mocks/search_provider_service.go +++ b/protogen/gen/ocis/services/search/v0/mocks/search_provider_service.go @@ -10,6 +10,7 @@ import ( mock "github.com/stretchr/testify/mock" v0 "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" + emptypb "google.golang.org/protobuf/types/known/emptypb" ) // SearchProviderService is an autogenerated mock type for the SearchProviderService type @@ -173,6 +174,80 @@ func (_c *SearchProviderService_Search_Call) RunAndReturn(run func(context.Conte return _c } +// OptimizeIndex provides a mock function with given fields: ctx, in, opts +func (_m *SearchProviderService) OptimizeIndex(ctx context.Context, in *emptypb.Empty, opts ...client.CallOption) (*emptypb.Empty, error) { + _va := make([]interface{}, len(opts)) + for _i := range opts { + _va[_i] = opts[_i] + } + var _ca []interface{} + _ca = append(_ca, ctx, in) + _ca = append(_ca, _va...) + ret := _m.Called(_ca...) + + if len(ret) == 0 { + panic("no return value specified for OptimizeIndex") + } + + var r0 *emptypb.Empty + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, *emptypb.Empty, ...client.CallOption) (*emptypb.Empty, error)); ok { + return rf(ctx, in, opts...) + } + if rf, ok := ret.Get(0).(func(context.Context, *emptypb.Empty, ...client.CallOption) *emptypb.Empty); ok { + r0 = rf(ctx, in, opts...) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).(*emptypb.Empty) + } + } + + if rf, ok := ret.Get(1).(func(context.Context, *emptypb.Empty, ...client.CallOption) error); ok { + r1 = rf(ctx, in, opts...) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// SearchProviderService_OptimizeIndex_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'OptimizeIndex' +type SearchProviderService_OptimizeIndex_Call struct { + *mock.Call +} + +// OptimizeIndex is a helper method to define mock.On call +// - ctx context.Context +// - in *emptypb.Empty +// - opts ...client.CallOption +func (_e *SearchProviderService_Expecter) OptimizeIndex(ctx interface{}, in interface{}, opts ...interface{}) *SearchProviderService_OptimizeIndex_Call { + return &SearchProviderService_OptimizeIndex_Call{Call: _e.mock.On("OptimizeIndex", + append([]interface{}{ctx, in}, opts...)...)} +} + +func (_c *SearchProviderService_OptimizeIndex_Call) Run(run func(ctx context.Context, in *emptypb.Empty, opts ...client.CallOption)) *SearchProviderService_OptimizeIndex_Call { + _c.Call.Run(func(args mock.Arguments) { + variadicArgs := make([]client.CallOption, len(args)-2) + for i, a := range args[2:] { + if a != nil { + variadicArgs[i] = a.(client.CallOption) + } + } + run(args[0].(context.Context), args[1].(*emptypb.Empty), variadicArgs...) + }) + return _c +} + +func (_c *SearchProviderService_OptimizeIndex_Call) Return(_a0 *emptypb.Empty, _a1 error) *SearchProviderService_OptimizeIndex_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *SearchProviderService_OptimizeIndex_Call) RunAndReturn(run func(context.Context, *emptypb.Empty, ...client.CallOption) (*emptypb.Empty, error)) *SearchProviderService_OptimizeIndex_Call { + _c.Call.Return(run) + return _c +} + // NewSearchProviderService creates a new instance of SearchProviderService. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewSearchProviderService(t interface { diff --git a/protogen/gen/ocis/services/search/v0/search.pb.micro.go b/protogen/gen/ocis/services/search/v0/search.pb.micro.go index 4e069f84485..561935c8dae 100644 --- a/protogen/gen/ocis/services/search/v0/search.pb.micro.go +++ b/protogen/gen/ocis/services/search/v0/search.pb.micro.go @@ -10,6 +10,7 @@ import ( _ "google.golang.org/genproto/googleapis/api/annotations" proto "google.golang.org/protobuf/proto" _ "google.golang.org/protobuf/types/known/fieldmaskpb" + emptypb "google.golang.org/protobuf/types/known/emptypb" math "math" ) @@ -47,6 +48,12 @@ func NewSearchProviderEndpoints() []*api.Endpoint { Method: []string{"POST"}, Handler: "rpc", }, + { + Name: "SearchProvider.OptimizeIndex", + Path: []string{"/api/v0/search/optimize-index"}, + Method: []string{"POST"}, + Handler: "rpc", + }, } } @@ -55,6 +62,7 @@ func NewSearchProviderEndpoints() []*api.Endpoint { type SearchProviderService interface { Search(ctx context.Context, in *SearchRequest, opts ...client.CallOption) (*SearchResponse, error) IndexSpace(ctx context.Context, in *IndexSpaceRequest, opts ...client.CallOption) (*IndexSpaceResponse, error) + OptimizeIndex(ctx context.Context, in *emptypb.Empty, opts ...client.CallOption) (*emptypb.Empty, error) } type searchProviderService struct { @@ -89,17 +97,29 @@ func (c *searchProviderService) IndexSpace(ctx context.Context, in *IndexSpaceRe return out, nil } +func (c *searchProviderService) OptimizeIndex(ctx context.Context, in *emptypb.Empty, opts ...client.CallOption) (*emptypb.Empty, error) { + req := c.c.NewRequest(c.name, "SearchProvider.OptimizeIndex", in) + out := new(emptypb.Empty) + err := c.c.Call(ctx, req, out, opts...) + if err != nil { + return nil, err + } + return out, nil +} + // Server API for SearchProvider service type SearchProviderHandler interface { Search(context.Context, *SearchRequest, *SearchResponse) error IndexSpace(context.Context, *IndexSpaceRequest, *IndexSpaceResponse) error + OptimizeIndex(context.Context, *emptypb.Empty, *emptypb.Empty) error } func RegisterSearchProviderHandler(s server.Server, hdlr SearchProviderHandler, opts ...server.HandlerOption) error { type searchProvider interface { Search(ctx context.Context, in *SearchRequest, out *SearchResponse) error IndexSpace(ctx context.Context, in *IndexSpaceRequest, out *IndexSpaceResponse) error + OptimizeIndex(ctx context.Context, in *emptypb.Empty, out *emptypb.Empty) error } type SearchProvider struct { searchProvider @@ -117,6 +137,12 @@ func RegisterSearchProviderHandler(s server.Server, hdlr SearchProviderHandler, Method: []string{"POST"}, Handler: "rpc", })) + opts = append(opts, api.WithEndpoint(&api.Endpoint{ + Name: "SearchProvider.OptimizeIndex", + Path: []string{"/api/v0/search/optimize-index"}, + Method: []string{"POST"}, + Handler: "rpc", + })) return s.Handle(s.NewHandler(&SearchProvider{h}, opts...)) } @@ -132,6 +158,10 @@ func (h *searchProviderHandler) IndexSpace(ctx context.Context, in *IndexSpaceRe return h.SearchProviderHandler.IndexSpace(ctx, in, out) } +func (h *searchProviderHandler) OptimizeIndex(ctx context.Context, in *emptypb.Empty, out *emptypb.Empty) error { + return h.SearchProviderHandler.OptimizeIndex(ctx, in, out) +} + // Api Endpoints for IndexProvider service func NewIndexProviderEndpoints() []*api.Endpoint { diff --git a/protogen/gen/ocis/services/search/v0/search.pb.web.go b/protogen/gen/ocis/services/search/v0/search.pb.web.go index 22cdf886cca..43967d10bd4 100644 --- a/protogen/gen/ocis/services/search/v0/search.pb.web.go +++ b/protogen/gen/ocis/services/search/v0/search.pb.web.go @@ -11,6 +11,7 @@ import ( "github.com/go-chi/render" merrors "go-micro.dev/v4/errors" "google.golang.org/protobuf/encoding/protojson" + emptypb "google.golang.org/protobuf/types/known/emptypb" ) type webSearchProviderHandler struct { @@ -74,6 +75,32 @@ func (h *webSearchProviderHandler) IndexSpace(w http.ResponseWriter, r *http.Req render.JSON(w, r, resp) } +func (h *webSearchProviderHandler) OptimizeIndex(w http.ResponseWriter, r *http.Request) { + req := &emptypb.Empty{} + resp := &emptypb.Empty{} + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + http.Error(w, err.Error(), http.StatusPreconditionFailed) + return + } + + if err := h.h.OptimizeIndex( + r.Context(), + req, + resp, + ); err != nil { + if merr, ok := merrors.As(err); ok && merr.Code == http.StatusNotFound { + http.Error(w, err.Error(), http.StatusNotFound) + } else { + http.Error(w, err.Error(), http.StatusBadRequest) + } + return + } + + render.Status(r, http.StatusCreated) + render.JSON(w, r, resp) +} + func RegisterSearchProviderWeb(r chi.Router, i SearchProviderHandler, middlewares ...func(http.Handler) http.Handler) { handler := &webSearchProviderHandler{ r: r, @@ -82,6 +109,7 @@ func RegisterSearchProviderWeb(r chi.Router, i SearchProviderHandler, middleware r.MethodFunc("POST", "/api/v0/search/search", handler.Search) r.MethodFunc("POST", "/api/v0/search/index-space", handler.IndexSpace) + r.MethodFunc("POST", "/api/v0/search/optimize-index", handler.OptimizeIndex) } type webIndexProviderHandler struct { diff --git a/protogen/proto/ocis/services/search/v0/search.proto b/protogen/proto/ocis/services/search/v0/search.proto index fc30a12fcf9..54d36e22b20 100644 --- a/protogen/proto/ocis/services/search/v0/search.proto +++ b/protogen/proto/ocis/services/search/v0/search.proto @@ -9,6 +9,7 @@ import "protoc-gen-openapiv2/options/annotations.proto"; import "google/api/field_behavior.proto"; import "google/api/annotations.proto"; import "google/protobuf/field_mask.proto"; +import "google/protobuf/empty.proto"; option (grpc.gateway.protoc_gen_openapiv2.options.openapiv2_swagger) = { info: { @@ -47,6 +48,12 @@ service SearchProvider { body: "*" }; } + rpc OptimizeIndex(google.protobuf.Empty) returns (google.protobuf.Empty) { + option (google.api.http) = { + post: "/api/v0/search/optimize-index", + body: "*" + }; + } } service IndexProvider { diff --git a/services/search/pkg/command/optimize.go b/services/search/pkg/command/optimize.go new file mode 100644 index 00000000000..a699a28ad6f --- /dev/null +++ b/services/search/pkg/command/optimize.go @@ -0,0 +1,54 @@ +package command + +import ( + "context" + "fmt" + "time" + + "github.com/urfave/cli/v2" + "go-micro.dev/v4/client" + "google.golang.org/protobuf/types/known/emptypb" + + "github.com/owncloud/ocis/v2/ocis-pkg/config/configlog" + "github.com/owncloud/ocis/v2/ocis-pkg/service/grpc" + "github.com/owncloud/ocis/v2/ocis-pkg/tracing" + searchsvc "github.com/owncloud/ocis/v2/protogen/gen/ocis/services/search/v0" + "github.com/owncloud/ocis/v2/services/search/pkg/config" + "github.com/owncloud/ocis/v2/services/search/pkg/config/parser" +) + +// Optimize is the entrypoint for the optimize command. +func Optimize(cfg *config.Config) *cli.Command { + return &cli.Command{ + Name: "optimize", + Usage: "compact the search index by merging segments, without re-indexing content", + Category: "index management", + Before: func(_ *cli.Context) error { + return configlog.ReturnFatal(parser.ParseConfig(cfg)) + }, + Action: func(_ *cli.Context) error { + traceProvider, err := tracing.GetServiceTraceProvider(cfg.Tracing, cfg.Service.Name) + if err != nil { + return err + } + grpcClient, err := grpc.NewClient( + append(grpc.GetClientOptions(cfg.GRPCClientTLS), + grpc.WithTraceProvider(traceProvider), + )..., + ) + if err != nil { + return err + } + + c := searchsvc.NewSearchProviderService("com.owncloud.api.search", grpcClient) + _, err = c.OptimizeIndex(context.Background(), &emptypb.Empty{}, + func(opts *client.CallOptions) { opts.RequestTimeout = 10 * time.Minute }) + if err != nil { + fmt.Println("failed to optimize index: " + err.Error()) + return err + } + fmt.Println("index optimization complete") + return nil + }, + } +} diff --git a/services/search/pkg/command/root.go b/services/search/pkg/command/root.go index f0f9d05b713..54f18281359 100644 --- a/services/search/pkg/command/root.go +++ b/services/search/pkg/command/root.go @@ -17,6 +17,7 @@ func GetCommands(cfg *config.Config) cli.Commands { // interaction with this service Index(cfg), + Optimize(cfg), // infos about this service Health(cfg), diff --git a/services/search/pkg/search/mocks/searcher.go b/services/search/pkg/search/mocks/searcher.go index 4549a9d57a3..cd3a284f730 100644 --- a/services/search/pkg/search/mocks/searcher.go +++ b/services/search/pkg/search/mocks/searcher.go @@ -70,6 +70,52 @@ func (_c *Searcher_IndexSpace_Call) RunAndReturn(run func(*providerv1beta1.Stora return _c } +// OptimizeIndex provides a mock function with given fields: ctx +func (_m *Searcher) OptimizeIndex(ctx context.Context) error { + ret := _m.Called(ctx) + + if len(ret) == 0 { + panic("no return value specified for OptimizeIndex") + } + + var r0 error + if rf, ok := ret.Get(0).(func(context.Context) error); ok { + r0 = rf(ctx) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// Searcher_OptimizeIndex_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'OptimizeIndex' +type Searcher_OptimizeIndex_Call struct { + *mock.Call +} + +// OptimizeIndex is a helper method to define mock.On call +// - ctx context.Context +func (_e *Searcher_Expecter) OptimizeIndex(ctx interface{}) *Searcher_OptimizeIndex_Call { + return &Searcher_OptimizeIndex_Call{Call: _e.mock.On("OptimizeIndex", ctx)} +} + +func (_c *Searcher_OptimizeIndex_Call) Run(run func(ctx context.Context)) *Searcher_OptimizeIndex_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *Searcher_OptimizeIndex_Call) Return(_a0 error) *Searcher_OptimizeIndex_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *Searcher_OptimizeIndex_Call) RunAndReturn(run func(context.Context) error) *Searcher_OptimizeIndex_Call { + _c.Call.Return(run) + return _c +} + // MoveItem provides a mock function with given fields: ref func (_m *Searcher) MoveItem(ref *providerv1beta1.Reference) { _m.Called(ref) diff --git a/services/search/pkg/search/service.go b/services/search/pkg/search/service.go index 0161dce1392..17f89c27359 100644 --- a/services/search/pkg/search/service.go +++ b/services/search/pkg/search/service.go @@ -58,6 +58,7 @@ var ( type Searcher interface { Search(ctx context.Context, req *searchsvc.SearchRequest) (*searchsvc.SearchResponse, error) IndexSpace(rID *provider.StorageSpaceId) error + OptimizeIndex(ctx context.Context) error TrashItem(rID *provider.ResourceId) UpsertItem(ref *provider.Reference) UpdateTags(ref *provider.Reference) @@ -501,6 +502,22 @@ func (s *Service) IndexSpace(spaceID *provider.StorageSpaceId) error { return nil } +// OptimizeIndex compacts the search index segments without re-indexing content. +func (s *Service) OptimizeIndex(ctx context.Context) error { + opt, ok := s.engine.(engine.Optimizer) + if !ok { + return fmt.Errorf("search engine does not support optimization") + } + + logDocCount(s.engine, s.logger) + s.logger.Info().Msg("optimizing search index") + if err := opt.Optimize(ctx); err != nil { + return fmt.Errorf("index optimization failed: %w", err) + } + s.logger.Info().Msg("index optimization complete") + return nil +} + // TrashItem marks the item as deleted. func (s *Service) TrashItem(rID *provider.ResourceId) { err := s.engine.Delete(storagespace.FormatResourceID(rID)) diff --git a/services/search/pkg/service/grpc/v0/service.go b/services/search/pkg/service/grpc/v0/service.go index e2f901d1b93..11f03cf04c4 100644 --- a/services/search/pkg/service/grpc/v0/service.go +++ b/services/search/pkg/service/grpc/v0/service.go @@ -21,6 +21,7 @@ import ( merrors "go-micro.dev/v4/errors" "go-micro.dev/v4/metadata" grpcmetadata "google.golang.org/grpc/metadata" + emptypb "google.golang.org/protobuf/types/known/emptypb" "github.com/owncloud/ocis/v2/ocis-pkg/generators" "github.com/owncloud/ocis/v2/ocis-pkg/log" @@ -224,6 +225,12 @@ func (s Service) IndexSpace(_ context.Context, in *searchsvc.IndexSpaceRequest, return nil } +// OptimizeIndex compacts the search index without re-indexing content. +func (s Service) OptimizeIndex(ctx context.Context, _ *emptypb.Empty, _ *emptypb.Empty) error { + s.log.Info().Msg("index optimization requested via CLI") + return s.searcher.OptimizeIndex(ctx) +} + // FromCache pulls a search result from cache func (s Service) FromCache(key string) (*searchsvc.SearchResponse, bool) { v, err := s.cache.Get(key)