From e60ffc9df37a28d6eff9e4f32624dc21f39d9367 Mon Sep 17 00:00:00 2001 From: baorenyi Date: Thu, 22 Feb 2018 13:31:59 +0800 Subject: [PATCH] merge 2.9 to master --- Makefile | 95 +- benchmark/run_test.sh | 23 +- benchmark/ycsb4tera.md | 7 + build.conf.template | 16 +- build.sh | 19 +- build_version.sh | 2 +- depends.mk.template | 11 +- doc/README.md | 160 +++ doc/cn/README.md | 13 +- doc/global_txn.md | 3 + doc/sdk_reference/client.md | 169 +++ doc/sdk_reference/mutation.md | 194 ++- doc/sdk_reference/reader.md | 173 +-- doc/sdk_reference/readme.md | 42 + doc/sdk_reference/scan.md | 98 ++ doc/sdk_reference/table.md | 100 ++ doc/sdk_reference/table_descriptor.md | 429 ++---- doc/sdk_reference/transaction.md | 60 + doc/sdk_reference/utils.md | 14 + doc/tools/benchmark.md | 38 + doc/tools/readme.md | 10 + doc/tools/teracli.md | 448 ++++++ doc/tools/terautil.md | 78 + doc/tools/ycsb.md | 294 ++++ example/onebox/conf/tera.flag | 21 +- include/tera/client.h | 5 + include/tera/error_code.h | 17 +- include/tera/reader.h | 6 + include/tera/scan.h | 5 + include/tera/table_descriptor.h | 6 + include/tera/transaction.h | 41 +- readme-cn.md | 49 +- resources/images/global_txn.png | Bin 0 -> 197896 bytes src/benchmark/mark.cc | 12 +- src/benchmark/mark.h | 37 +- src/benchmark/mark_main.cc | 60 +- src/benchmark/tpcc/data_generator.cc | 182 +++ src/benchmark/tpcc/data_generator.h | 61 + src/benchmark/tpcc/driver.cc | 190 +++ src/benchmark/tpcc/driver.h | 68 + src/benchmark/tpcc/mock_tpccdb.cc | 18 + src/benchmark/tpcc/mock_tpccdb.h | 98 ++ src/benchmark/tpcc/random_generator.cc | 132 ++ src/benchmark/tpcc/random_generator.h | 64 + src/benchmark/tpcc/tera_tpccdb.cc | 538 +++++++ src/benchmark/tpcc/tera_tpccdb.h | 101 ++ src/benchmark/tpcc/tera_txn/delivery_txn.cc | 144 ++ src/benchmark/tpcc/tera_txn/new_order_txn.cc | 214 +++ .../tpcc/tera_txn/order_status_txn.cc | 89 ++ src/benchmark/tpcc/tera_txn/payment_txn.cc | 194 +++ src/benchmark/tpcc/tera_txn/stocklevel_txn.cc | 79 + .../tpcc/test/data_generator_test.cc | 88 ++ .../tpcc/test/random_generator_test.cc | 81 ++ src/benchmark/tpcc/test/tpcc_test.cc | 21 + src/benchmark/tpcc/tpcc_flags.cc | 17 + src/benchmark/tpcc/tpcc_main.cc | 78 + src/benchmark/tpcc/tpcc_schemas/t_customer | 5 + .../tpcc/tpcc_schemas/t_customer_last_index | 5 + src/benchmark/tpcc/tpcc_schemas/t_district | 5 + src/benchmark/tpcc/tpcc_schemas/t_history | 5 + .../tpcc/tpcc_schemas/t_history_index | 5 + src/benchmark/tpcc/tpcc_schemas/t_item | 5 + src/benchmark/tpcc/tpcc_schemas/t_neworder | 5 + src/benchmark/tpcc/tpcc_schemas/t_order | 5 + src/benchmark/tpcc/tpcc_schemas/t_order_index | 5 + src/benchmark/tpcc/tpcc_schemas/t_orderline | 5 + src/benchmark/tpcc/tpcc_schemas/t_stock | 5 + src/benchmark/tpcc/tpcc_schemas/t_warehouse | 5 + src/benchmark/tpcc/tpcc_types.h | 139 ++ src/benchmark/tpcc/tpccdb.cc | 360 +++++ src/benchmark/tpcc/tpccdb.h | 471 ++++++ src/common/atomic.h | 10 +- src/common/counter.h | 14 +- src/common/cpu_profiler.cc | 50 + src/common/cpu_profiler.h | 68 + src/common/event.h | 67 + src/common/file/file_path.cc | 27 + src/common/file/file_path.h | 7 + src/common/heap_profiler.cc | 51 + src/common/heap_profiler.h | 90 ++ src/common/log/log_cleaner.cc | 322 +++++ src/common/log/log_cleaner.h | 114 ++ src/common/metric/cache_collector.h | 108 ++ src/common/metric/collector.h | 15 + src/common/metric/collector_report.h | 49 + .../metric/collector_report_publisher.cc | 150 ++ .../metric/collector_report_publisher.h | 162 +++ src/common/metric/counter_collector.h | 41 + src/common/metric/hardware_collectors.cc | 250 ++++ src/common/metric/hardware_collectors.h | 104 ++ src/common/metric/metric_counter.h | 93 ++ src/common/metric/metric_http_server.cc | 232 +++ src/common/metric/metric_http_server.h | 84 ++ src/common/metric/metric_id.cc | 156 ++ src/common/metric/metric_id.h | 143 ++ src/common/metric/prometheus_subscriber.cc | 142 ++ src/common/metric/prometheus_subscriber.h | 81 ++ src/common/metric/ratio_collector.h | 45 + src/common/metric/ratio_subscriber.h | 58 + src/common/metric/subscriber.h | 66 + src/common/mutex.h | 18 +- src/common/request_done_wrapper.h | 29 + src/common/test/collector_report_test.cc | 179 +++ src/common/test/common_test_main.cc | 30 + src/{utils => common}/test/counter_test.cc | 6 +- src/common/test/log_cleaner_test.cc | 246 ++++ src/common/test/metric_counter_test.cc | 97 ++ src/common/test/metric_http_server_test.cc | 138 ++ src/common/test/metric_id_test.cc | 178 +++ src/common/test/metrics_test.cc | 187 +++ src/common/test/profiler_test.cc | 83 ++ src/common/test/thread_pool_test.cc | 2 +- src/common/timer.h | 46 +- src/io/default_compact_strategy.cc | 5 +- src/io/tablet_io.cc | 301 +++- src/io/tablet_io.h | 71 +- src/io/tablet_scanner.cc | 4 + src/io/tablet_scanner.h | 6 +- src/io/tablet_writer.cc | 103 +- src/io/tablet_writer.h | 13 + src/io/test/load_test.cc | 50 +- src/io/test/tablet_io_test.cc | 171 ++- src/io/test/tablet_scanner_test.cc | 8 +- src/io/utils_leveldb.cc | 209 ++- src/io/utils_leveldb.h | 15 + src/lbcli_main.cc | 314 ++++ src/leveldb/Makefile | 2 +- src/leveldb/build_detect_platform | 10 - src/leveldb/db/builder.cc | 3 +- src/leveldb/db/db_impl.cc | 849 +++++++---- src/leveldb/db/db_impl.h | 46 +- src/leveldb/db/db_table.cc | 66 +- src/leveldb/db/db_table.h | 2 + src/leveldb/db/db_test.cc | 53 + src/leveldb/db/filename.cc | 40 +- src/leveldb/db/filename.h | 15 + src/leveldb/db/memtable.cc | 1 + src/leveldb/db/memtable.h | 8 + src/leveldb/db/version_edit.cc | 72 +- src/leveldb/db/version_edit.h | 6 +- src/leveldb/db/version_edit_test.cc | 83 +- src/leveldb/db/version_set.cc | 824 +++++++++-- src/leveldb/db/version_set.h | 53 +- src/leveldb/db/version_set_test.cc | 59 +- src/leveldb/include/leveldb/db.h | 2 + src/leveldb/include/leveldb/dfs.h | 5 + src/leveldb/include/leveldb/env_dfs.h | 4 +- src/leveldb/include/leveldb/options.h | 17 +- src/leveldb/include/leveldb/status.h | 9 +- src/leveldb/port/port_posix.h | 14 - src/leveldb/table/table_builder.cc | 2 +- src/leveldb/util/env_cache.cc | 3 + src/leveldb/util/env_dfs.cc | 5 +- src/leveldb/util/env_flash.cc | 11 +- src/leveldb/util/env_inmem.cc | 2 +- src/leveldb/util/env_mock.cc | 3 + src/leveldb/util/env_posix.cc | 13 +- src/leveldb/util/hdfs.cc | 19 +- src/leveldb/util/hdfs.h | 5 +- src/leveldb/util/hdfs2.cc | 18 +- src/leveldb/util/hdfs_util.h | 64 + src/leveldb/util/nfs.cc | 37 +- src/leveldb/util/nfs.h | 3 + src/leveldb/util/options.cc | 5 +- src/leveldb/util/raw_key_operator.cc | 2 +- src/leveldb/util/status.cc | 3 + src/load_balancer/action.h | 45 + src/load_balancer/action_generator.h | 67 + src/load_balancer/action_generators.cc | 344 +++++ src/load_balancer/action_generators.h | 134 ++ src/load_balancer/actions.cc | 47 + src/load_balancer/actions.h | 43 + src/load_balancer/balancer.h | 39 + src/load_balancer/cluster.cc | 537 +++++++ src/load_balancer/cluster.h | 130 ++ src/load_balancer/cost_function.h | 125 ++ src/load_balancer/cost_functions.cc | 222 +++ src/load_balancer/cost_functions.h | 135 ++ src/load_balancer/lb_entry.cc | 74 + src/load_balancer/lb_entry.h | 38 + src/load_balancer/lb_impl.cc | 531 +++++++ src/load_balancer/lb_impl.h | 93 ++ src/load_balancer/lb_node.h | 30 + src/load_balancer/lb_service_impl.cc | 49 + src/load_balancer/lb_service_impl.h | 42 + src/load_balancer/options.h | 100 ++ src/load_balancer/plan.h | 71 + src/load_balancer/random.h | 73 + .../test/action_generators_test.cc | 311 ++++ src/load_balancer/test/actions_test.cc | 29 + src/load_balancer/test/balancer_test_main.cc | 29 + src/load_balancer/test/cluster_test.cc | 391 +++++ src/load_balancer/test/cost_functions_test.cc | 176 +++ src/load_balancer/test/random_test.cc | 44 + src/load_balancer/unity_balancer.cc | 264 ++++ src/load_balancer/unity_balancer.h | 58 + src/master/availability.cc | 60 +- src/master/availability.h | 20 +- src/master/gc_strategy.cc | 436 +----- src/master/gc_strategy.h | 54 +- src/master/master_entry.cc | 17 +- src/master/master_entry.h | 2 + src/master/master_impl.cc | 411 +++++- src/master/master_impl.h | 20 +- src/master/master_zk_adapter.cc | 8 - src/master/master_zk_adapter.h | 1 - src/master/tablet_manager.cc | 212 ++- src/master/tablet_manager.h | 81 +- src/master/tabletnode_manager.cc | 2 +- src/master/test/master_impl_test.cc | 284 +++- src/master/test/master_test.cc | 9 +- src/master/test/trackable_gc_test.cc | 4 +- src/master/workload_scheduler.cc | 5 +- src/monitor/teramo_main.cc | 5 +- src/observer/executor/key_selector.h | 29 + src/observer/executor/notification.h | 38 + src/observer/executor/notification_impl.cc | 67 + src/observer/executor/notification_impl.h | 42 + src/observer/executor/notify_cell.h | 110 ++ src/observer/executor/observer.h | 52 + src/observer/executor/random_key_selector.cc | 134 ++ src/observer/executor/random_key_selector.h | 47 + src/observer/executor/scanner.h | 41 + src/observer/executor/scanner_entry.cc | 63 + src/observer/executor/scanner_entry.h | 40 + src/observer/executor/scanner_impl.cc | 657 +++++++++ src/observer/executor/scanner_impl.h | 118 ++ src/observer/observer_demo/demo_entry.cc | 59 + src/observer/observer_demo/demo_entry.h | 30 + src/observer/observer_demo/demo_observer.cc | 156 ++ src/observer/observer_demo/demo_observer.h | 86 ++ .../observer_demo/observe_demo_main.cc | 84 ++ .../rowlocknode/fake_rowlock_client.h | 41 + .../fake_rowlocknode_zk_adapter.cc | 66 + .../rowlocknode/fake_rowlocknode_zk_adapter.h | 55 + .../ins_rowlock_client_zk_adapter.cc | 55 + .../ins_rowlock_client_zk_adapter.h | 50 + .../rowlocknode/ins_rowlocknode_zk_adapter.cc | 80 ++ .../rowlocknode/ins_rowlocknode_zk_adapter.h | 56 + .../rowlocknode/remote_rowlocknode.cc | 36 + src/observer/rowlocknode/remote_rowlocknode.h | 37 + src/observer/rowlocknode/rowlock_db.h | 161 +++ src/observer/rowlocknode/rowlocknode_entry.cc | 87 ++ src/observer/rowlocknode/rowlocknode_entry.h | 39 + src/observer/rowlocknode/rowlocknode_impl.cc | 82 ++ src/observer/rowlocknode/rowlocknode_impl.h | 52 + .../rowlocknode/rowlocknode_zk_adapter.cc | 119 ++ .../rowlocknode/rowlocknode_zk_adapter.h | 55 + .../rowlocknode/rowlocknode_zk_adapter_base.h | 21 + .../zk_rowlock_client_zk_adapter.cc | 58 + .../zk_rowlock_client_zk_adapter.h | 29 + .../rowlockproxy/remote_rowlock_proxy.cc | 36 + .../rowlockproxy/remote_rowlock_proxy.h | 38 + .../rowlockproxy/rowlock_proxy_entry.cc | 79 + .../rowlockproxy/rowlock_proxy_entry.h | 37 + .../rowlockproxy/rowlock_proxy_impl.cc | 146 ++ .../rowlockproxy/rowlock_proxy_impl.h | 68 + .../rowlockproxy/rowlock_proxy_zk_adapter.cc | 411 ++++++ .../rowlockproxy/rowlock_proxy_zk_adapter.h | 83 ++ src/observer/test/observer_test.cc | 587 ++++++++ src/observer/test/rowlock_proxy_test.cc | 107 ++ src/observer/test/rowlock_test.cc | 184 +++ src/observer/test/scanner_test.cc | 495 +++++++ src/proto/lb_client.cc | 37 + src/proto/lb_client.h | 35 + src/proto/load_balancer_rpc.proto | 11 + src/proto/rowlocknode_rpc.proto | 19 + src/proto/rpc_client.h | 6 +- src/proto/status_code.proto | 15 + src/proto/table_meta.proto | 11 + src/proto/table_schema.proto | 2 + src/proto/tabletnode.proto | 1 + src/proto/tabletnode_client.cc | 8 + src/proto/tabletnode_client.h | 3 + src/proto/tabletnode_rpc.proto | 6 + src/proto/timeoracle_rpc.proto | 20 + src/sample/Makefile | 14 +- src/sample/atomic_sample.cc | 1 + src/sample/global_txn_async_sample.cc | 143 ++ src/sample/global_txn_sync_sample.cc | 107 ++ src/sample/tera_row_txn_sample.cc | 3 + src/sdk/client_impl.cc | 81 +- src/sdk/client_impl.h | 17 +- src/sdk/global_txn.cc | 1142 +++++++++++++++ src/sdk/global_txn.h | 273 ++++ src/sdk/global_txn_internal.cc | 559 ++++++++ src/sdk/global_txn_internal.h | 366 +++++ src/sdk/http/http.cc | 2 +- src/sdk/multi_row_txn.cc | 79 - src/sdk/multi_row_txn.h | 56 - src/sdk/mutate_impl.cc | 2 +- src/sdk/mutate_impl.h | 2 +- src/sdk/read_impl.cc | 8 + src/sdk/read_impl.h | 7 +- src/sdk/rowlock_client.cc | 140 ++ src/sdk/rowlock_client.h | 77 + src/sdk/scan.cc | 4 + src/sdk/scan_impl.cc | 46 +- src/sdk/scan_impl.h | 40 +- src/sdk/schema_impl.cc | 29 +- src/sdk/schema_impl.h | 15 + src/sdk/sdk_metric_name.h | 58 + src/sdk/sdk_perf.cc | 85 ++ src/sdk/sdk_perf.h | 54 + src/sdk/sdk_task.cc | 5 +- src/sdk/sdk_task.h | 16 +- src/sdk/sdk_utils.cc | 98 ++ src/sdk/sdk_utils.h | 6 + src/sdk/sdk_zk.cc | 325 ++++- src/sdk/sdk_zk.h | 117 +- src/sdk/single_row_txn.cc | 37 +- src/sdk/single_row_txn.h | 37 +- src/sdk/table_impl.cc | 681 ++++----- src/sdk/table_impl.h | 69 +- src/sdk/tera.cc | 36 + src/sdk/tera_easy.cc | 4 +- src/sdk/test/filter_utils_test.cc | 14 +- src/sdk/test/global_txn_batch_op.cc | 440 ++++++ src/sdk/test/global_txn_internal_test.cc | 789 ++++++++++ src/sdk/test/global_txn_test.cc | 1265 +++++++++++++++++ src/sdk/test/global_txn_test_tool.cc | 754 ++++++++++ src/sdk/test/global_txn_test_tool.h | 95 ++ src/sdk/test/global_txn_testutils.cc | 178 +++ src/sdk/test/global_txn_testutils.h | 41 + src/sdk/test/mock_table.h | 78 + src/sdk/test/scan_impl_test.cc | 45 +- src/sdk/test/sdk_test.cc | 16 + src/sdk/test/sdk_timeout_manager_test.cc | 244 ++++ src/sdk/test/sdk_utils_test.cc | 288 ++-- src/sdk/timeoracle_client_impl.cc | 118 ++ src/sdk/timeoracle_client_impl.h | 56 + src/tabletnode/remote_tabletnode.cc | 139 +- src/tabletnode/remote_tabletnode.h | 86 ++ src/tabletnode/rpc_schedule_policy.cc | 2 +- src/tabletnode/tabletnode_entry.cc | 31 +- src/tabletnode/tabletnode_entry.h | 2 + src/tabletnode/tabletnode_impl.cc | 306 ++-- src/tabletnode/tabletnode_impl.h | 38 +- src/tabletnode/tabletnode_metric_name.h | 113 ++ src/tabletnode/tabletnode_sysinfo.cc | 869 ++++++----- src/tabletnode/tabletnode_sysinfo.h | 10 +- src/tabletnode/tabletnode_zk_adapter.cc | 3 + src/tabletnode/test/tabletnode_impl_test.cc | 4 +- .../test/tabletnode_sysinfo_test.cc | 2 +- src/tera_c.cc | 6 +- src/tera_flags.cc | 158 +- src/tera_main.cc | 40 +- src/tera_test_main.cc | 17 +- src/teracli_main.cc | 758 +++++++++- src/terautil.cc | 732 ++++++++++ src/timeoracle/bench/timeoracle_bench.cc | 48 + src/timeoracle/remote_timeoracle.h | 73 + src/timeoracle/test/timeoracle_test.cc | 78 + src/timeoracle/timeoracle.cc | 13 + src/timeoracle/timeoracle.h | 124 ++ src/timeoracle/timeoracle_entry.cc | 174 +++ src/timeoracle/timeoracle_entry.h | 49 + src/timeoracle/timeoracle_zk_adapter.cc | 477 +++++++ src/timeoracle/timeoracle_zk_adapter.h | 124 ++ src/timeoracle_main.cc | 69 + src/types.h | 14 + src/utils/atomic.h | 110 -- src/utils/counter.h | 76 - src/utils/timer.h | 57 - src/zk/zk_adapter.cc | 12 +- src/zk/zk_adapter.h | 6 +- src/zk/zk_util.cc | 2 +- 367 files changed, 35897 insertions(+), 3554 deletions(-) create mode 100644 doc/README.md create mode 100644 doc/global_txn.md create mode 100644 doc/sdk_reference/client.md create mode 100644 doc/sdk_reference/readme.md create mode 100644 doc/sdk_reference/scan.md create mode 100644 doc/sdk_reference/table.md create mode 100644 doc/sdk_reference/transaction.md create mode 100644 doc/sdk_reference/utils.md create mode 100644 doc/tools/benchmark.md create mode 100644 doc/tools/readme.md create mode 100644 doc/tools/teracli.md create mode 100644 doc/tools/terautil.md create mode 100644 doc/tools/ycsb.md create mode 100644 resources/images/global_txn.png create mode 100644 src/benchmark/tpcc/data_generator.cc create mode 100644 src/benchmark/tpcc/data_generator.h create mode 100644 src/benchmark/tpcc/driver.cc create mode 100644 src/benchmark/tpcc/driver.h create mode 100644 src/benchmark/tpcc/mock_tpccdb.cc create mode 100644 src/benchmark/tpcc/mock_tpccdb.h create mode 100644 src/benchmark/tpcc/random_generator.cc create mode 100644 src/benchmark/tpcc/random_generator.h create mode 100644 src/benchmark/tpcc/tera_tpccdb.cc create mode 100644 src/benchmark/tpcc/tera_tpccdb.h create mode 100644 src/benchmark/tpcc/tera_txn/delivery_txn.cc create mode 100644 src/benchmark/tpcc/tera_txn/new_order_txn.cc create mode 100644 src/benchmark/tpcc/tera_txn/order_status_txn.cc create mode 100644 src/benchmark/tpcc/tera_txn/payment_txn.cc create mode 100644 src/benchmark/tpcc/tera_txn/stocklevel_txn.cc create mode 100644 src/benchmark/tpcc/test/data_generator_test.cc create mode 100644 src/benchmark/tpcc/test/random_generator_test.cc create mode 100644 src/benchmark/tpcc/test/tpcc_test.cc create mode 100644 src/benchmark/tpcc/tpcc_flags.cc create mode 100644 src/benchmark/tpcc/tpcc_main.cc create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_customer create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_customer_last_index create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_district create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_history create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_history_index create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_item create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_neworder create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_order create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_order_index create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_orderline create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_stock create mode 100644 src/benchmark/tpcc/tpcc_schemas/t_warehouse create mode 100644 src/benchmark/tpcc/tpcc_types.h create mode 100644 src/benchmark/tpcc/tpccdb.cc create mode 100644 src/benchmark/tpcc/tpccdb.h create mode 100644 src/common/cpu_profiler.cc create mode 100644 src/common/cpu_profiler.h create mode 100644 src/common/heap_profiler.cc create mode 100644 src/common/heap_profiler.h create mode 100644 src/common/log/log_cleaner.cc create mode 100644 src/common/log/log_cleaner.h create mode 100644 src/common/metric/cache_collector.h create mode 100644 src/common/metric/collector.h create mode 100644 src/common/metric/collector_report.h create mode 100644 src/common/metric/collector_report_publisher.cc create mode 100644 src/common/metric/collector_report_publisher.h create mode 100644 src/common/metric/counter_collector.h create mode 100644 src/common/metric/hardware_collectors.cc create mode 100644 src/common/metric/hardware_collectors.h create mode 100644 src/common/metric/metric_counter.h create mode 100644 src/common/metric/metric_http_server.cc create mode 100644 src/common/metric/metric_http_server.h create mode 100644 src/common/metric/metric_id.cc create mode 100644 src/common/metric/metric_id.h create mode 100644 src/common/metric/prometheus_subscriber.cc create mode 100644 src/common/metric/prometheus_subscriber.h create mode 100644 src/common/metric/ratio_collector.h create mode 100644 src/common/metric/ratio_subscriber.h create mode 100644 src/common/metric/subscriber.h mode change 100644 => 100755 src/common/mutex.h create mode 100644 src/common/request_done_wrapper.h create mode 100644 src/common/test/collector_report_test.cc create mode 100644 src/common/test/common_test_main.cc rename src/{utils => common}/test/counter_test.cc (95%) create mode 100644 src/common/test/log_cleaner_test.cc create mode 100644 src/common/test/metric_counter_test.cc create mode 100644 src/common/test/metric_http_server_test.cc create mode 100644 src/common/test/metric_id_test.cc create mode 100644 src/common/test/metrics_test.cc create mode 100644 src/common/test/profiler_test.cc create mode 100644 src/lbcli_main.cc create mode 100644 src/leveldb/util/hdfs_util.h create mode 100644 src/load_balancer/action.h create mode 100644 src/load_balancer/action_generator.h create mode 100644 src/load_balancer/action_generators.cc create mode 100644 src/load_balancer/action_generators.h create mode 100644 src/load_balancer/actions.cc create mode 100644 src/load_balancer/actions.h create mode 100644 src/load_balancer/balancer.h create mode 100644 src/load_balancer/cluster.cc create mode 100644 src/load_balancer/cluster.h create mode 100644 src/load_balancer/cost_function.h create mode 100644 src/load_balancer/cost_functions.cc create mode 100644 src/load_balancer/cost_functions.h create mode 100644 src/load_balancer/lb_entry.cc create mode 100644 src/load_balancer/lb_entry.h create mode 100644 src/load_balancer/lb_impl.cc create mode 100644 src/load_balancer/lb_impl.h create mode 100644 src/load_balancer/lb_node.h create mode 100644 src/load_balancer/lb_service_impl.cc create mode 100644 src/load_balancer/lb_service_impl.h create mode 100644 src/load_balancer/options.h create mode 100644 src/load_balancer/plan.h create mode 100644 src/load_balancer/random.h create mode 100644 src/load_balancer/test/action_generators_test.cc create mode 100644 src/load_balancer/test/actions_test.cc create mode 100644 src/load_balancer/test/balancer_test_main.cc create mode 100644 src/load_balancer/test/cluster_test.cc create mode 100644 src/load_balancer/test/cost_functions_test.cc create mode 100644 src/load_balancer/test/random_test.cc create mode 100644 src/load_balancer/unity_balancer.cc create mode 100644 src/load_balancer/unity_balancer.h create mode 100644 src/observer/executor/key_selector.h create mode 100644 src/observer/executor/notification.h create mode 100644 src/observer/executor/notification_impl.cc create mode 100644 src/observer/executor/notification_impl.h create mode 100644 src/observer/executor/notify_cell.h create mode 100644 src/observer/executor/observer.h create mode 100644 src/observer/executor/random_key_selector.cc create mode 100644 src/observer/executor/random_key_selector.h create mode 100644 src/observer/executor/scanner.h create mode 100644 src/observer/executor/scanner_entry.cc create mode 100644 src/observer/executor/scanner_entry.h create mode 100644 src/observer/executor/scanner_impl.cc create mode 100644 src/observer/executor/scanner_impl.h create mode 100644 src/observer/observer_demo/demo_entry.cc create mode 100644 src/observer/observer_demo/demo_entry.h create mode 100644 src/observer/observer_demo/demo_observer.cc create mode 100644 src/observer/observer_demo/demo_observer.h create mode 100644 src/observer/observer_demo/observe_demo_main.cc create mode 100644 src/observer/rowlocknode/fake_rowlock_client.h create mode 100644 src/observer/rowlocknode/fake_rowlocknode_zk_adapter.cc create mode 100644 src/observer/rowlocknode/fake_rowlocknode_zk_adapter.h create mode 100644 src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc create mode 100644 src/observer/rowlocknode/ins_rowlock_client_zk_adapter.h create mode 100644 src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc create mode 100644 src/observer/rowlocknode/ins_rowlocknode_zk_adapter.h create mode 100644 src/observer/rowlocknode/remote_rowlocknode.cc create mode 100644 src/observer/rowlocknode/remote_rowlocknode.h create mode 100644 src/observer/rowlocknode/rowlock_db.h create mode 100644 src/observer/rowlocknode/rowlocknode_entry.cc create mode 100644 src/observer/rowlocknode/rowlocknode_entry.h create mode 100644 src/observer/rowlocknode/rowlocknode_impl.cc create mode 100644 src/observer/rowlocknode/rowlocknode_impl.h create mode 100644 src/observer/rowlocknode/rowlocknode_zk_adapter.cc create mode 100644 src/observer/rowlocknode/rowlocknode_zk_adapter.h create mode 100644 src/observer/rowlocknode/rowlocknode_zk_adapter_base.h create mode 100644 src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc create mode 100644 src/observer/rowlocknode/zk_rowlock_client_zk_adapter.h create mode 100644 src/observer/rowlockproxy/remote_rowlock_proxy.cc create mode 100644 src/observer/rowlockproxy/remote_rowlock_proxy.h create mode 100644 src/observer/rowlockproxy/rowlock_proxy_entry.cc create mode 100644 src/observer/rowlockproxy/rowlock_proxy_entry.h create mode 100644 src/observer/rowlockproxy/rowlock_proxy_impl.cc create mode 100644 src/observer/rowlockproxy/rowlock_proxy_impl.h create mode 100644 src/observer/rowlockproxy/rowlock_proxy_zk_adapter.cc create mode 100644 src/observer/rowlockproxy/rowlock_proxy_zk_adapter.h create mode 100644 src/observer/test/observer_test.cc create mode 100644 src/observer/test/rowlock_proxy_test.cc create mode 100644 src/observer/test/rowlock_test.cc create mode 100644 src/observer/test/scanner_test.cc create mode 100644 src/proto/lb_client.cc create mode 100644 src/proto/lb_client.h create mode 100644 src/proto/load_balancer_rpc.proto create mode 100644 src/proto/rowlocknode_rpc.proto create mode 100644 src/proto/timeoracle_rpc.proto create mode 100644 src/sample/global_txn_async_sample.cc create mode 100644 src/sample/global_txn_sync_sample.cc create mode 100644 src/sdk/global_txn.cc create mode 100644 src/sdk/global_txn.h create mode 100644 src/sdk/global_txn_internal.cc create mode 100644 src/sdk/global_txn_internal.h delete mode 100644 src/sdk/multi_row_txn.cc delete mode 100644 src/sdk/multi_row_txn.h create mode 100644 src/sdk/rowlock_client.cc create mode 100644 src/sdk/rowlock_client.h create mode 100644 src/sdk/sdk_metric_name.h create mode 100644 src/sdk/sdk_perf.cc create mode 100644 src/sdk/sdk_perf.h create mode 100644 src/sdk/test/global_txn_batch_op.cc create mode 100644 src/sdk/test/global_txn_internal_test.cc create mode 100644 src/sdk/test/global_txn_test.cc create mode 100644 src/sdk/test/global_txn_test_tool.cc create mode 100644 src/sdk/test/global_txn_test_tool.h create mode 100644 src/sdk/test/global_txn_testutils.cc create mode 100644 src/sdk/test/global_txn_testutils.h create mode 100644 src/sdk/test/mock_table.h create mode 100644 src/sdk/test/sdk_test.cc create mode 100644 src/sdk/test/sdk_timeout_manager_test.cc create mode 100644 src/sdk/timeoracle_client_impl.cc create mode 100644 src/sdk/timeoracle_client_impl.h create mode 100644 src/tabletnode/tabletnode_metric_name.h mode change 100644 => 100755 src/tabletnode/tabletnode_zk_adapter.cc mode change 100644 => 100755 src/tera_flags.cc create mode 100644 src/terautil.cc create mode 100644 src/timeoracle/bench/timeoracle_bench.cc create mode 100644 src/timeoracle/remote_timeoracle.h create mode 100644 src/timeoracle/test/timeoracle_test.cc create mode 100644 src/timeoracle/timeoracle.cc create mode 100644 src/timeoracle/timeoracle.h create mode 100644 src/timeoracle/timeoracle_entry.cc create mode 100644 src/timeoracle/timeoracle_entry.h create mode 100644 src/timeoracle/timeoracle_zk_adapter.cc create mode 100644 src/timeoracle/timeoracle_zk_adapter.h create mode 100644 src/timeoracle_main.cc delete mode 100644 src/utils/atomic.h delete mode 100644 src/utils/counter.h delete mode 100644 src/utils/timer.h mode change 100644 => 100755 src/zk/zk_adapter.cc diff --git a/Makefile b/Makefile index c6eb1d15d..6c7f4a51b 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ ifndef CC CC = gcc endif -INCPATH += -I./src -I./include -I./src/leveldb/include -I./src/leveldb \ +INCPATH += -I./src -I./include -I./src/leveldb/include -I./src/leveldb -I./src/sdk \ -I./src/sdk/java/native-src $(DEPS_INCPATH) CFLAGS += $(OPT) $(INCPATH) -fPIC -fvisibility=hidden # hide internal symbol of tera CXXFLAGS += -std=gnu++11 $(CFLAGS) @@ -28,28 +28,42 @@ PROTO_OUT_H := $(PROTO_FILES:.proto=.pb.h) MASTER_SRC := $(wildcard src/master/*.cc) TABLETNODE_SRC := $(wildcard src/tabletnode/*.cc) IO_SRC := $(wildcard src/io/*.cc) -SDK_SRC := $(wildcard src/sdk/*.cc) +SDK_SRC := $(wildcard src/sdk/*.cc) $(wildcard src/sdk/test/global_txn_testutils.cc) \ + src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc HTTP_SRC := $(wildcard src/sdk/http/*.cc) PROTO_SRC := $(filter-out %.pb.cc, $(wildcard src/proto/*.cc)) $(PROTO_OUT_CC) JNI_TERA_SRC := $(wildcard src/sdk/java/native-src/*.cc) VERSION_SRC := src/version.cc OTHER_SRC := $(wildcard src/zk/*.cc) $(wildcard src/utils/*.cc) $(VERSION_SRC) \ - src/tera_flags.cc + src/tera_flags.cc src/sdk/test/global_txn_testutils.cc COMMON_SRC := $(wildcard src/common/base/*.cc) $(wildcard src/common/net/*.cc) \ $(wildcard src/common/file/*.cc) $(wildcard src/common/file/recordio/*.cc) \ - $(wildcard src/common/console/*.cc) + $(wildcard src/common/console/*.cc) $(wildcard src/common/log/*.cc) \ + $(wildcard src/common/metric/*.cc) SERVER_WRAPPER_SRC := src/tera_main_wrapper.cc SERVER_SRC := src/tera_main.cc src/tera_entry.cc CLIENT_SRC := src/teracli_main.cc +TERAUTIL_SRC := src/terautil.cc +GTXN_TEST_SRC := src/sdk/test/global_txn_test_tool.cc TEST_CLIENT_SRC := src/tera_test_main.cc TERA_C_SRC := src/tera_c.cc MONITOR_SRC := src/monitor/teramo_main.cc MARK_SRC := src/benchmark/mark.cc src/benchmark/mark_main.cc +COMMON_TEST_SRC := $(wildcard src/common/test/*.cc) TEST_SRC := src/utils/test/prop_tree_test.cc src/utils/test/tprinter_test.cc \ src/io/test/tablet_io_test.cc src/io/test/tablet_scanner_test.cc \ src/io/test/load_test.cc src/master/test/master_test.cc \ src/master/test/master_impl_test.cc src/master/test/trackable_gc_test.cc \ - src/common/test/thread_pool_test.cc + src/observer/test/rowlock_test.cc src/observer/test/scanner_test.cc \ + src/observer/test/observer_test.cc \ + $(wildcard src/sdk/test/*_test.cc) $(COMMON_TEST_SRC) + +TIMEORACLE_SRC := $(wildcard src/timeoracle/*.cc) src/tera_entry.cc +TIMEORACLE_BENCH_SRC := src/timeoracle/bench/timeoracle_bench.cc +ROWLOCK_SRC := $(wildcard src/observer/rowlocknode/*.cc) src/sdk/rowlock_client.cc +ROWLOCK_PROXY_SRC := $(wildcard src/observer/rowlockproxy/*.cc) +OBSERVER_SRC := src/observer/executor/scanner_impl.cc src/observer/executor/random_key_selector.cc +OBSERVER_DEMO_SRC := $(wildcard src/observer/observer_demo.cc) TEST_OUTPUT := test_output UNITTEST_OUTPUT := $(TEST_OUTPUT)/unittest @@ -65,39 +79,53 @@ COMMON_OBJ := $(COMMON_SRC:.cc=.o) SERVER_WRAPPER_OBJ := $(SERVER_WRAPPER_SRC:.cc=.o) SERVER_OBJ := $(SERVER_SRC:.cc=.o) CLIENT_OBJ := $(CLIENT_SRC:.cc=.o) +TERAUTIL_OBJ := $(TERAUTIL_SRC:.cc=.o) +GTXN_TEST_OBJ := $(GTXN_TEST_SRC:.cc=.o) TEST_CLIENT_OBJ := $(TEST_CLIENT_SRC:.cc=.o) TERA_C_OBJ := $(TERA_C_SRC:.cc=.o) MONITOR_OBJ := $(MONITOR_SRC:.cc=.o) MARK_OBJ := $(MARK_SRC:.cc=.o) HTTP_OBJ := $(HTTP_SRC:.cc=.o) +COMMON_TEST_OBJ := $(COMMON_TEST_SRC:.cc=.o) TEST_OBJ := $(TEST_SRC:.cc=.o) +TIMEORACLE_OBJ := $(TIMEORACLE_SRC:.cc=.o) +TIMEORACLE_BENCH_OBJ := $(TIMEORACLE_BENCH_SRC:.cc=.o) +ROWLOCK_OBJ := $(ROWLOCK_SRC:.cc=.o) +ROWLOCK_PROXY_OBJ := $(ROWLOCK_PROXY_SRC:.cc=.o) +OBSERVER_OBJ := $(OBSERVER_SRC:.cc=.o) +OBSERVER_DEMO_OBJ := $(OBSERVER_DEMO_SRC:.cc=.o) ALL_OBJ := $(MASTER_OBJ) $(TABLETNODE_OBJ) $(IO_OBJ) $(SDK_OBJ) $(PROTO_OBJ) \ - $(JNI_TERA_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(SERVER_OBJ) $(CLIENT_OBJ) \ + $(JNI_TERA_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(SERVER_OBJ) $(CLIENT_OBJ) $(TERAUTIL_OBJ) \ $(TEST_CLIENT_OBJ) $(TERA_C_OBJ) $(MONITOR_OBJ) $(MARK_OBJ) \ - $(SERVER_WRAPPER_OBJ) + $(SERVER_WRAPPER_OBJ) $(TIMEORACLE_OBJ) $(ROWLOCK_OBJ) $(ROWLOCK_PROXY_OBJ) $(OBSERVER_OBJ) $(OBSERVER_DEMO_OBJ) LEVELDB_LIB := src/leveldb/libleveldb.a LEVELDB_UTIL := src/leveldb/util/histogram.o src/leveldb/port/port_posix.o -PROGRAM = tera_main tera_master tabletserver teracli teramo tera_test +PROGRAM = tera_main tera_master tabletserver teracli terautil teramo tera_test timeoracle timeoracle_bench rowlock observer_demo rowlock_proxy +TEST_PROGRAM=gtxn_test_tool + LIBRARY = libtera.a SOLIBRARY = libtera.so TERA_C_SO = libtera_c.so JNILIBRARY = libjni_tera.so +OBSERVER_LIBRARY = libobserver.a BENCHMARK = tera_bench tera_mark TESTS = prop_tree_test tprinter_test string_util_test tablet_io_test \ - tablet_scanner_test fragment_test progress_bar_test master_test load_test \ - thread_pool_test + tablet_scanner_test fragment_test progress_bar_test master_test load_test observer_test \ + common_test sdk_test .PHONY: all clean cleanall test -all: $(PROGRAM) $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) $(BENCHMARK) +all: $(PROGRAM) $(TEST_PROGRAM) $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) $(BENCHMARK) $(OBSERVER_LIBRARY) mkdir -p build/include build/lib build/bin build/log build/benchmark cp $(PROGRAM) build/bin - cp $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) build/lib + cp $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) $(OBSERVER_LIBRARY) build/lib cp src/leveldb/tera_bench . cp -r benchmark/*.sh benchmark/ycsb4tera/ $(BENCHMARK) build/benchmark cp -r include build/ cp -r conf build + mkdir -p test/tools + cp $(TEST_PROGRAM) test/tools echo 'Done' test: $(TESTS) @@ -115,11 +143,12 @@ check: test clean: rm -rf $(ALL_OBJ) $(TEST_OBJ) $(PROTO_OUT_CC) $(PROTO_OUT_H) $(TEST_OUTPUT) $(MAKE) clean -C src/leveldb - rm -rf $(PROGRAM) $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) $(BENCHMARK) $(TESTS) terahttp + rm -rf $(PROGRAM) $(TEST_PROGRAM) $(LIBRARY) $(OBSERVER_LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) $(BENCHMARK) $(TESTS) terahttp cleanall: $(MAKE) clean rm -rf build + rm -rf test/tools tera_main: src/tera_main_wrapper.o src/version.o src/tera_flags.o $(CXX) -o $@ $^ $(LDFLAGS) @@ -135,6 +164,13 @@ tabletserver: $(SERVER_OBJ) $(TABLETNODE_OBJ) $(IO_OBJ) $(SDK_OBJ) \ libtera.a: $(SDK_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_UTIL) $(AR) -rs $@ $^ +observer_demo : $(OBSERVER_DEMO_OBJ) $(OBSERVER_LIBRARY) $(LIBRARY) + $(CXX) -o $@ $^ $(LDFLAGS) + +libobserver.a: $(OBSERVER_OBJ) $(SDK_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_UTIL) \ + $(IO_OBJ) $(SDK_OBJ) + $(AR) -rs $@ $^ + libtera.so: $(SDK_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_UTIL) $(CXX) -o $@ $^ $(SO_LDFLAGS) @@ -144,6 +180,12 @@ libtera_c.so: $(TERA_C_OBJ) $(LIBRARY) teracli: $(CLIENT_OBJ) $(LIBRARY) $(CXX) -o $@ $^ $(LDFLAGS) +terautil: $(TERAUTIL_OBJ) $(LIBRARY) + $(CXX) -o $@ $^ $(LDFLAGS) + +gtxn_test_tool: $(GTXN_TEST_OBJ) $(LIBRARY) + $(CXX) -o $@ $^ $(LDFLAGS) + teramo: $(MONITOR_OBJ) $(LIBRARY) $(CXX) -o $@ $^ $(LDFLAGS) @@ -153,6 +195,18 @@ tera_mark: $(MARK_OBJ) $(LIBRARY) $(LEVELDB_LIB) tera_test: $(TEST_CLIENT_OBJ) $(LIBRARY) $(CXX) -o $@ $(TEST_CLIENT_OBJ) $(LIBRARY) $(LDFLAGS) +timeoracle: $(TIMEORACLE_OBJ) $(PROTO_OBJ) $(COMMON_OBJ) $(OTHER_OBJ) + $(CXX) -o $@ $^ $(LDFLAGS) + +timeoracle_bench : $(TIMEORACLE_BENCH_OBJ) $(LIBRARY) + $(CXX) -o $@ $^ $(LDFLAGS) + +rowlock : $(SERVER_OBJ) $(ROWLOCK_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) + $(CXX) -o $@ $^ $(LDFLAGS) + +rowlock_proxy : $(SERVER_OBJ) $(ROWLOCK_PROXY_OBJ) $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(OBSERVER_LIBRARY) + $(CXX) -o $@ $^ $(LDFLAGS) + terahttp: $(HTTP_OBJ) $(PROTO_OBJ) $(LIBRARY) $(CXX) -o $@ $^ $(LDFLAGS) @@ -165,7 +219,7 @@ src/leveldb/libleveldb.a: FORCE tera_bench: # unit test -thread_pool_test: src/common/test/thread_pool_test.o $(LIBRARY) +common_test: $(COMMON_TEST_OBJ) $(LIBRARY) $(CXX) -o $@ $^ $(LDFLAGS) prop_tree_test: src/utils/test/prop_tree_test.o $(LIBRARY) @@ -200,6 +254,15 @@ master_test: src/master/test/master_test.o src/master/test/master_impl_test.o \ $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) $(CXX) -o $@ $^ $(LDFLAGS) +sdk_test: src/sdk/test/global_txn_internal_test.o src/sdk/test/global_txn_test.o \ + src/sdk/test/filter_utils_test.o src/sdk/test/scan_impl_test.o \ + src/sdk/test/sdk_timeout_manager_test.o src/sdk/test/sdk_test.o $(SDK_OBJ) \ + $(PROTO_OBJ) $(OTHER_OBJ) $(COMMON_OBJ) $(LEVELDB_LIB) + $(CXX) -o $@ $^ $(LDFLAGS) + +observer_test: src/observer/test/rowlock_test.o src/observer/test/scanner_test.o src/observer/test/observer_test.o src/observer/observer_demo/demo_observer.o $(PROTO_OBJ) $(COMMON_OBJ) $(OTHER_OBJ) $(OBSERVER_OBJ) $(LIBRARY) + $(CXX) -o $@ $^ $(LDFLAGS) + $(ALL_OBJ): %.o: %.cc $(PROTO_OUT_H) $(CXX) $(CXXFLAGS) -c $< -o $@ @@ -222,8 +285,8 @@ proto: $(PROTO_OUT_CC) $(PROTO_OUT_H) # install output into system directories .PHONY: install -install: $(PROGRAM) $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) +install: $(PROGRAM) $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) mkdir -p $(INSTALL_PREFIX)/bin $(INSTALL_PREFIX)/include $(INSTALL_PREFIX)/lib cp -rf $(PROGRAM) $(INSTALL_PREFIX)/bin cp -rf include/* $(INSTALL_PREFIX)/include - cp -rf $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) $(INSTALL_PREFIX)/lib + cp -rf $(LIBRARY) $(SOLIBRARY) $(TERA_C_SO) $(JNILIBRARY) $(INSTALL_PREFIX)/lib diff --git a/benchmark/run_test.sh b/benchmark/run_test.sh index 8f33ce5e6..b1e9e7c34 100755 --- a/benchmark/run_test.sh +++ b/benchmark/run_test.sh @@ -1,8 +1,8 @@ #!/bin/bash -if [[ $# != 7 || $6 -lt 0 || $6 -gt 100 ]]; then - echo "$0 DIST[zipfian, uniform, latest] ROW_NUM OP_NUM VALUE_SIZE COLUMN_NUM UPDATE_PROPORTION[0~100] TABLE_NAME" - exit 0 +if [[ $# != 9 || $6 -lt 0 || $6 -gt 100 ]]; then + echo "$0 DIST[zipfian, uniform, latest] ROW_NUM OP_NUM VALUE_SIZE COLUMN_NUM UPDATE_PROPORTION[0~100] OP_SPEED THREAD_NUM TABLE_NAME" + exit 1 fi DIST=$1 @@ -11,11 +11,12 @@ OP_NUM=$3 VALUE_SIZE=$4 COLUMN_NUM=$5 UPDATE_PROPORTION=$6 -TABLE_NAME=$7 +OP_SPEED=$7 +THREAD_NUM=$8 +TABLE_NAME=$9 -UPDATE_PROPORTION=`printf "%02d" $6` -READ_PROPORTION=`expr 100 - $UPDATE_PROPORTION` -READ_PROPORTION=`printf "%02d" $READ_PROPORTION` +UPDATE_PROPORTION=`echo $6 | awk '{printf("%.2f",$1/100)}'` +READ_PROPORTION=`echo $6 | awk '{printf("%.2f",(100-$1)/100)}'` echo "$UPDATE_PROPORTION" echo "$READ_PROPORTION" @@ -30,8 +31,12 @@ bin/ycsb run tera -p workload=com.yahoo.ycsb.workloads.CoreWorkload \ -p operationcount=$OP_NUM \ -p fieldlength=$VALUE_SIZE \ -p fieldcount=$COLUMN_NUM \ - -p updateproportion=0.$UPDATE_PROPORTION \ - -p readproportion=0.$READ_PROPORTION \ + -p updateproportion=$UPDATE_PROPORTION \ + -p readproportion=$READ_PROPORTION \ + -p target=$OP_SPEED \ + -p thread=$THREAD_NUM \ -p exportfile=ycsb.out \ | ./tera_mark --mode=m --tablename=$TABLE_NAME --type=async --verify=false +exit $? + diff --git a/benchmark/ycsb4tera.md b/benchmark/ycsb4tera.md index f32ae9e28..d348434ce 100644 --- a/benchmark/ycsb4tera.md +++ b/benchmark/ycsb4tera.md @@ -30,6 +30,13 @@ 更新(写入)占所有操作的比例 updateproportion: what proportion of operations should be updates (default: 0.05) + + 每秒总共操作的次数 + target: target ops/sec all threads (default: unthrottled) + + 客户端线程数 + thread: number of client threads (default: 1) + ``` 以下参数对于tera的测试意义不大,使用默认值即可: diff --git a/build.conf.template b/build.conf.template index 1fd914ec6..170383dde 100755 --- a/build.conf.template +++ b/build.conf.template @@ -18,6 +18,7 @@ LIBUNWIND_VERSION=0.99 GPERFTOOLS_VERSION=2.5 INS_VERSION=0.17 NOSE_VERSION=1.3.7 +MONGOOSE_VERSION=6.8 if [ $MIRROR == "china" ]; then BOOST_URL=http://mirrors.tuna.tsinghua.edu.cn/macports/distfiles/boost/boost_${BOOST_VERSION}.tar.bz2 @@ -32,6 +33,7 @@ if [ $MIRROR == "china" ]; then GPERFTOOLS_URL=https://github.com/00k/gperftools/raw/master/gperftools-${GPERFTOOLS_VERSION}.tar.gz INS_URL=https://github.com/baidu/ins/archive/${INS_VERSION}.tar.gz NOSE_URL=http://mirrors.163.com/gentoo/distfiles/nose-${NOSE_VERSION}.tar.gz + MONGOOSE_URL=https://github.com/cesanta/mongoose/archive/${MONGOOSE_VERSION}.tar.gz elif [ $MIRROR == "origin" ]; then BOOST_URL=http://downloads.sourceforge.net/project/boost/boost/1.58.0/boost_${BOOST_VERSION}.tar.bz2 PROTOBUF_URL=https://github.com/google/protobuf/releases/download/v${PROTOBUF_VERSION}/protobuf-${PROTOBUF_VERSION}.tar.bz2 @@ -45,19 +47,7 @@ elif [ $MIRROR == "origin" ]; then GPERFTOOLS_URL=https://github.com/gperftools/gperftools/releases/download/gperftools-${GPERFTOOLS_VERSION}/gperftools-${GPERFTOOLS_VERSION}.tar.gz INS_URL=https://github.com/baidu/ins/archive/${INS_VERSION}.tar.gz NOSE_URL=https://pypi.python.org/packages/58/a5/0dc93c3ec33f4e281849523a5a913fa1eea9a3068acfa754d44d88107a44/nose-${NOSE_VERSION}.tar.gz -elif [ $MIRROR == "baidu" ]; then - BOOST_URL=http://gitlab.baidu.com/baidups/third/raw/master/boost_${BOOST_VERSION}.tar.bz2 - PROTOBUF_URL=http://gitlab.baidu.com/baidups/third/raw/master/protobuf-${PROTOBUF_VERSION}.tar.bz2 - SNAPPY_URL=http://gitlab.baidu.com/baidups/third/raw/master/snappy-${SNAPPY_VERSION}.tar.gz - SOFA_PBRPC_URL=http://gitlab.baidu.com/baidups/third/raw/master/sofa-pbrpc-${SOFA_PBRPC_VERSION}.tar.gz - ZOOKEEPER_URL=http://gitlab.baidu.com/baidups/third/raw/master/zookeeper-${ZOOKEEPER_VERSION}.tar.gz - GFLAGS_URL=http://gitlab.baidu.com/baidups/third/raw/master/gflags-${GFLAGS_VERSION}.tar.gz - GLOG_URL=http://gitlab.baidu.com/baidups/third/raw/master/glog-${GLOG_VERSION}.tar.gz - GTEST_URL=http://gitlab.baidu.com/baidups/third/raw/master/googletest-release-${GTEST_VERSION}.tar.gz - LIBUNWIND_URL=http://gitlab.baidu.com/baidups/third/raw/master/libunwind-${LIBUNWIND_VERSION}.tar.gz - GPERFTOOLS_URL=http://gitlab.baidu.com/baidups/third/raw/master/gperftools-${GPERFTOOLS_VERSION}.tar.gz - INS_URL=http://gitlab.baidu.com/baidups/third/raw/master/ins-${INS_VERSION}.tar.gz - NOSE_URL=http://gitlab.baidu.com/baidups/third/raw/master/nose-${NOSE_VERSION}.tar.gz + MONGOOSE_URL=https://github.com/cesanta/mongoose/archive/${MONGOOSE_VERSION}.tar.gz else return 1 fi diff --git a/build.sh b/build.sh index 1e1156aa9..f565149ef 100755 --- a/build.sh +++ b/build.sh @@ -218,7 +218,7 @@ elif [ ! -f "${FLAG_DIR}/ins_${INS_VERSION}" ] \ sed -i "s|^PROTOBUF_PATH ?=.*|PROTOBUF_PATH ?=${DEPS_PREFIX}|" Makefile sed -i "s|^PBRPC_PATH ?=.*|PBRPC_PATH ?=${DEPS_PREFIX}|" Makefile sed -i "s|^GTEST_PATH ?=.*|GTEST_PATH ?=${DEPS_PREFIX}|" Makefile - #BOOST_PATH=${DEPS_PREFIX}/boost_${BOOST_VERSION} make install_sdk + # BOOST_PATH=${DEPS_PREFIX}/boost_${BOOST_VERSION} make install_sdk make -j4 install_sdk cd - touch "${FLAG_DIR}/ins_${INS_VERSION}" @@ -239,6 +239,23 @@ elif [ ! -f "${FLAG_DIR}/nose_${NOSE_VERSION}" ] \ touch "${FLAG_DIR}/nose_${NOSE_VERSION}" fi +# mongoose +if [ ${MONGOOSE_VERSION} == "DISABLE" ]; then + echo "Disable mongoose." +elif [ ! -f "${FLAG_DIR}/mongoose_${MONGOOSE_VERSION}" ] \ + || [ ! -f "${DEPS_PREFIX}/include/mongoose.h" ] \ + || [ ! -f "${DEPS_PREFIX}/lib/libmongoose.a" ]; then + wget --no-check-certificate -O mongoose-${MONGOOSE_VERSION}.tar.gz ${MONGOOSE_URL} + tar zxf mongoose-${MONGOOSE_VERSION}.tar.gz --recursive-unlink + cd mongoose-${MONGOOSE_VERSION} + cp -af mongoose.h ${DEPS_PREFIX}/include + gcc -c mongoose.c -o mongoose.o -g2 -pipe -Wall -Werror -fPIC + ar -rv libmongoose.a mongoose.o + cp -af libmongoose.a ${DEPS_PREFIX}/lib + cd - + touch "${FLAG_DIR}/mongoose_${MONGOOSE_VERSION}" +fi + cd ${WORK_DIR} ######################################## diff --git a/build_version.sh b/build_version.sh index 8cac725a6..2534fcb85 100755 --- a/build_version.sh +++ b/build_version.sh @@ -56,7 +56,7 @@ GIT_INFO_FILE=git_info.tmp VERSION_CPP_FILE=src/version.cc # generate template file -git log | head -n 6 | sed 's/$/&\\n\\/g' > $GIT_INFO_FILE +git log | head -n 6 | sed 's/"/\\"/g' | sed 's/$/&\\n\\/g' > $GIT_INFO_FILE gen_info_template_header > $TEMPLATE_HEADER_FILE gen_info_template_foot > $TEMPLATE_FOOT_FILE gen_info_print_template >> $TEMPLATE_FOOT_FILE diff --git a/depends.mk.template b/depends.mk.template index 191cd8162..f0dbea180 100644 --- a/depends.mk.template +++ b/depends.mk.template @@ -14,17 +14,19 @@ GLOG_PREFIX=./thirdparty GTEST_PREFIX=./thirdparty GPERFTOOLS_PREFIX=./thirdparty INS_PREFIX=./thirdparty +MONGOOSE_PREFIX=./thirdparty BOOST_INCDIR=./thirdparty/boost_1_57_0 SOFA_PBRPC_INCDIR = $(SOFA_PBRPC_PREFIX)/include PROTOBUF_INCDIR = $(PROTOBUF_PREFIX)/include SNAPPY_INCDIR = $(SNAPPY_PREFIX)/include -ZOOKEEPER_INCDIR = $(ZOOKEEPER_PREFIX)/include +ZOOKEEPER_INCDIR = $(ZOOKEEPER_PREFIX)/include/zookeeper GFLAGS_INCDIR = $(GFLAGS_PREFIX)/include GLOG_INCDIR = $(GLOG_PREFIX)/include GTEST_INCDIR = $(GTEST_PREFIX)/include GPERFTOOLS_INCDIR = $(GPERFTOOLS_PREFIX)/include INS_INCDIR = $(INS_PREFIX)/include +MONGOOSE_INCDIR = $(MONGOOSE_PREFIX)/include SOFA_PBRPC_LIBDIR = $(SOFA_PBRPC_PREFIX)/lib PROTOBUF_LIBDIR = $(PROTOBUF_PREFIX)/lib @@ -35,6 +37,7 @@ GLOG_LIBDIR = $(GLOG_PREFIX)/lib GTEST_LIBDIR = $(GTEST_PREFIX)/lib GPERFTOOLS_LIBDIR = $(GPERFTOOLS_PREFIX)/lib INS_LIBDIR = $(INS_PREFIX)/lib +MONGOOSE_LIBDIR = $(MONGOOSE_PREFIX)/lib PROTOC = $(PROTOBUF_PREFIX)/bin/protoc @@ -45,13 +48,13 @@ PROTOC = $(PROTOBUF_PREFIX)/bin/protoc DEPS_INCPATH = -I$(SOFA_PBRPC_INCDIR) -I$(PROTOBUF_INCDIR) \ -I$(SNAPPY_INCDIR) -I$(ZOOKEEPER_INCDIR) \ -I$(GFLAGS_INCDIR) -I$(GLOG_INCDIR) -I$(GTEST_INCDIR) \ - -I$(GPERFTOOLS_INCDIR) -I$(BOOST_INCDIR) -I$(INS_INCDIR) + -I$(GPERFTOOLS_INCDIR) -I$(BOOST_INCDIR) -I$(INS_INCDIR) -I$(MONGOOSE_INCDIR) DEPS_LDPATH = -L$(SOFA_PBRPC_LIBDIR) -L$(PROTOBUF_LIBDIR) \ -L$(SNAPPY_LIBDIR) -L$(ZOOKEEPER_LIBDIR) \ -L$(GFLAGS_LIBDIR) -L$(GLOG_LIBDIR) -L$(GTEST_LIBDIR) \ - -L$(GPERFTOOLS_LIBDIR) -L$(INS_LIBDIR) + -L$(GPERFTOOLS_LIBDIR) -L$(INS_LIBDIR) -L$(MONGOOSE_LIBDIR) SO_DEPS_LDFLAGS = -lins_sdk -lsofa-pbrpc -lprotobuf -lsnappy -lzookeeper_mt \ - -lgtest_main -lgtest -lglog -lgflags + -lgtest_main -lgtest -lglog -lgflags -lmongoose DEPS_LDFLAGS = $(SO_DEPS_LDFLAGS) -ltcmalloc_minimal -lunwind ################################################################ diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 000000000..eb4ed0d0a --- /dev/null +++ b/doc/README.md @@ -0,0 +1,160 @@ + +# Tera SDK及工具说明 + +## 目录 +### 1. [主要数据结构](#main-data-structure) + +* tera::[client](./sdk_reference/client.md) +* tera::[table](./sdk_reference/table.md) +* tera::[mutation](./sdk_reference/mutation.md) +* tera::[reader](./sdk_reference/reader.md) +* tera::[table_descriptor](./sdk_reference/table_descriptor.md) +* tera::[transaction](./sdk_reference/transaction.md) +* tera::[scan](./sdk_reference/scan.md) +* tera::[utils](./sdk_reference/utils.md) + +### 2. [主要工具](#main-tools) +* [teracli](./tools/teracli.md) +* [terautil](./tools/terautil.md) +* [tera_bench & tera_mark](./tools/benchmark.md) +* [YCSB](./tools/ycsb.md) + + + +### 1. 主要数据结构 +#### (1) tera::client 访问tera服务主结构,所有对tera的访问或操作全部由此发起。 +一个集群对应一个client即可,如需访问多个client,需要创建多个 +##### 主要功能包括: +* 表格操作:建、删、加载、卸载、打开、关闭、更新表结构、获取表格信息、快照等 +* 用户管理:建、删、修改密码、组管理等 +* 集群信息获取:获取全部表格列表、状态等 + +#### (2) tera::table 表格主结构,对表格的所有增删查改操作由此发起。 +由tera::Client::OpenTable产生,tera::Client::CloseTable关闭,不可析构。 + +#### (3) tera::error_code 错误码,很多操作会返回,注意检查。 + +#### (4) tera::mutation + +#### (5) tera::scan 扫描操作,并获取返回数据。 + +#### (6) tera::reader 读取操作,并获取返回数据。 + +#### (7) tera::table_descriptor 表格描述符主体 + +#### (8) tera::transaction 单行事务 + +#### (9) tera::scan 扫描 + +#### (10) tera::utils 编码解码 + + +### 2. 主要工具 +#### (1) teracli 操作tera的工具 +* 实际上封装了对数据的操作等,可用来进行表格创建、schema更新等管理、控制操作。 +* 查看有哪些命令可用 :./teracli help; +* 查看某个命令的help:./teracli help [cmd],例如./teracli help tablet + +#### (2) terautil 集群间数据迁移的dump工具 + +* 具体用法./terautil dump help +* 建表主要用法:./terautil --flagfile=../conf/terautil.flag dump prepare_safe +* 扫表run起来主要用法:./terautil --flagfile=../conf/terautil.flag dump run +* flag配置 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
flag名称flag默认值或格式flag介绍
dump_tera_src_conf ../conf/src_tera.flag(格式)tera的源集群
dump_tera_dest_conf../conf/dest_tera.flag(格式)tera的目的集群
dump_tera_src_root_path/xxx_(路径格式)tera的源路径
dump_tera_dest_root_path/xxx_(路径格式)tera的目的路径
ins_cluster_addrterautil_ins(格式)锁服务器的地址
ins_cluster_root_path/terautil/dump/xxxx(格式)锁服务器路径
dump_tera_src_meta_addr“”源meta表的地址
dump_tera_dest_meta_addr“”目的meta表的地址
dump_manual_split_interval1000手动分裂时间间隔,单位为ms
dump_enable_manual_splitfalse是否允许手动分裂
+ + +#### (3) tera_mark 读写数据 +* 支持异步读写scan +``` +#示例: +./tera_mark --mode=w --tablename=test --type=async --verify=false --entry_limit=1000 +``` +* 参数列表 + +参数名 | 意义 | 有效取值 | 单位 | 默认值 | 其它说明 +--- | --- | --- | --- | --- | --- +table | 表名 | - | - | "" | +mode | 模式 | "w"/"r"/"s"/"m" | - | "w" | - +type | 类型 | "sync"/"async" | - | "async" | - +pend_size | 最大pending大小 | - | - | 100 | - +pend_count | 最大pending数 | - | - | 100000 | - +start_key | scan的开始key | - | - | "" | - +end_key | scan的结束key | - | - | "" | - +cf_list | scan的列簇 | - | - | "" | - +print | scan的结果是否需要打印 | true/false | - | false | - +buf_size | scan的buffer_size | >0 | - | 65536 | - +verify | md5 verify(writer&read) | true/false | - | true | - +max_outflow | max_outflow | - | - | -1 | - +max_rate | max_rate | - | - | -1 | - +scan_streaming | enable streaming scan | true/false | - | false | - +batch_count | batch_count(sync) | - | - | 1 | - +entry_limit | writing/reading speed limit | - | - | 0 | - + +#### (4) tera_bench 造数据的工具 +``` +./tera_bench --compression_ratio=1 --key_seed=1 --value_seed=20 --value_size=1000 --num=200000 +--benchmarks=random --key_size=24 --key_step=1 +``` + +#### (5) YCSB 业界通用NoSQL测试的基准测试工具 + +* 全称Yahoo! Cloud Serving Benchmark,Yahoo公司开发的专门用于NoSQL测试的基准测试工具 +* YCSB支持各种不同的数据分布方式,如Uniform(等概论随机选择记录)、Zipfian(随机选择记录,存在热记录)、Latest(近期写入的记录为热记录) + diff --git a/doc/cn/README.md b/doc/cn/README.md index d18e4cf74..12eeb4d98 100644 --- a/doc/cn/README.md +++ b/doc/cn/README.md @@ -1,5 +1,5 @@ -# Tera文档专区 +# Tera文档专区 ## 简介 [系统设计](../tera_design.md) @@ -10,9 +10,15 @@ [体验单机Tera](onebox.md) -[命令行工具teracli使用方法](teracli.md) +[命令行工具teracli使用方法](../tools/teracli.md) + +[集群间数据迁移的dump工具terautil使用方法](../tools/terautil.md) -[主要api使用方法](sdk_guide.md) +[造数据的工具 & 读写数据使用方法](../tools/benchmark.md) + +[性能测试工具ycsb的使用方法](../tools/ycsb.md) + +[主要api使用方法](../sdk_reference/readme.md) [搭建tera集群](cluster_setup.md) @@ -35,3 +41,4 @@ ## 版本发布 [版本发布及管理](../release_management.md) + diff --git a/doc/global_txn.md b/doc/global_txn.md new file mode 100644 index 000000000..bb62d4c79 --- /dev/null +++ b/doc/global_txn.md @@ -0,0 +1,3 @@ +# Tera全局事务的原理及实现 + +[image-1]: ../resources/images/global_txn.png diff --git a/doc/sdk_reference/client.md b/doc/sdk_reference/client.md new file mode 100644 index 000000000..a7f6fe878 --- /dev/null +++ b/doc/sdk_reference/client.md @@ -0,0 +1,169 @@ + +# Client接口说明 + +## 主要功能 + +#### 1. 表格管理 +##### (1) 新建client Client::NewClient +``` +1.1) static Client* NewClient(const std::string& confpath, const std::string& log_prefix, ErrorCode* err = NULL) +1.2) static Client* NewClient(const std::string& confpath, ErrorCode* err = NULL) +1.3) static Client* NewClient() +``` + +##### (2) 打开表格 Client::OpenTable +``` +Table* OpenTable(const std::string& table_name, ErrorCode* err) = 0 +``` +##### (3) 建表 Client::CreateTable +``` +1) bool CreateTable(const TableDescriptor& desc, ErrorCode* err) = 0 //新建带有具体描述符的表格 +2) bool CreateTable(const TableDescriptor& desc, const std::vector& tablet_delim, ErrorCode* err) = 0 //新建多个前缀为tablet_delim的tablets +``` + +##### (4) 更新schema Client::UpdateTableSchema + +``` +bool ClientImpl::UpdateTableSchema(const TableDescriptor& desc, ErrorCode* err) = 0 +``` +调用UpdateTable(desc, err),分两种情况: +* 更新lg属性。需要先disable表格 +* 更新cf属性。直接更新 +##### (5) 检查更新状态 Client::UpdateCheck + +``` +bool UpdateCheck(const std::string& table_name, bool* done, ErrorCode* err) = 0 +``` + +##### (6) disable表 Client::DisableTable +暂停表,表格不再提供读、写服务。某些属性的更新需要先disable表;使用drop删除表时,需要先执行disable操作,此操作不可回滚。 + +``` +bool DisableTable(const std::string& name, ErrorCode* err) = 0 +``` + +##### (7) drop表 Client::DropTable +删除处于disable状态的表格,此操作不可回滚。 + +``` +bool DropTable(const std::string& name, ErrorCode* err) = 0 +``` + +##### (8) enable表 Client::EnableTable + +将处于disable状态的表格重新enable,恢复读、写服务。 + +``` +bool EnableTable(const std::string& name, ErrorCode* err) = 0 +``` + +##### (9) 获取表的描述符 Client::GetTableDescriptor +``` +TableDescriptor* GetTableDescriptor(const std::string& table_name, ErrorCode* err) = 0 +``` + +##### (10) 列出所有的表 Client::List +``` +bool List(std::vector* table_list, ErrorCode* err) = 0;//列出所有的表 +bool List(const std::string& table_name, TableInfo* table_info, std::vector* tablet_list, ErrorCode* err) = 0;//获取指定的表 +``` +##### (11) 检查表是否存在 Client::IsTableExist +``` +bool IsTableExist(const std::string& table_name, ErrorCode* err) = 0 +``` + +##### (12) 检查表是否为enable状态 Client::IsTableEnabled +``` +bool IsTableEnabled(const std::string& table_name, ErrorCode* err) = 0 +``` + +##### (13) 检查表是否为空 Client::IsTableEmpty +``` +bool IsTableEmpty(const std::string& table_name, ErrorCode* err) = 0 +``` + +##### (14) 发送请求给服务器 Client::CmdCtrl +``` +bool CmdCtrl(const std::string& command, const std::vector& arg_list, bool* bool_result, std::string* str_result, ErrorCode* err) = 0 +``` + +##### (15) 使用glog的用户防止冲突 Client::SetGlogIsInitialized +``` +void SetGlogIsInitialized() +``` + +##### (16) 删除表格 Client::DeleteTable +``` +bool DeleteTable(const std::string& name, ErrorCode* err) = 0 +``` + +##### (17) 更新表格 Client::UpdateTable +``` +bool UpdateTable(const TableDescriptor& desc, ErrorCode* err) = 0 +``` + +##### (18) 获得表格的位置 Client::GetTabletLocation +``` +bool GetTabletLocation(const std::string& table_name, std::vector* tablets, ErrorCode* err) = 0 +``` + +##### (19) 重命名表格 Client::Rename +``` +bool Rename(const std::string& old_table_name, const std::string& new_table_name, ErrorCode* err) = 0 +``` +#### 2. 用户管理 + +##### (1) 创建用户 Client::CreateUser + +``` +bool ClientImpl::CreateUser(const std::string& user, + const std::string& password, ErrorCode* err) = 0 +``` +##### (2) 删除用户 Client::DeleteUser + +``` +bool ClientImpl::DeleteUser(const std::string& user, ErrorCode* err) = 0 +``` + +##### (3) 修改用户密码 Client::ChangePwd + +``` +bool ClientImpl::ChangePwd(const std::string& user, const std::string& password, ErrorCode* err) = 0 +``` + +##### (4) 显示指定用户信息 Client::ShowUser + +``` +bool ClientImpl::ShowUser(const std::string& user, std::vector& user_groups, ErrorCode* err) = 0 +``` + +##### (5) 添加用户到用户群 Client::AddUserToGroup + +``` +bool ClientImpl::AddUserToGroup(const std::string& user_name, const std::string& group_name, ErrorCode* err)= 0 +``` + +##### (6) 从用户群中删除用户 Client::DeleteUserFromGroup + +``` +bool ClientImpl::DeleteUserFromGroup(const std::string& user_name, const std::string& group_name, ErrorCode* err) = 0 +``` + + + diff --git a/doc/sdk_reference/mutation.md b/doc/sdk_reference/mutation.md index 54e444607..752891f9b 100644 --- a/doc/sdk_reference/mutation.md +++ b/doc/sdk_reference/mutation.md @@ -1,108 +1,154 @@ -# RowMutation +# RowMutation接口说明 tera sdk中通过RowMutation结构描述一次行更新操作,包含删除操作。 -一个RowMutaion中可以同时对多列进行操作,保证: - * 服务端生效时序与RowMutation的执行时序相同。比如对某列的删除+更新,服务端生效时不会乱序,导致先更新再删除的情况发生。 - * 同一个RowMutation中的操作保证同时成功或失败。 - * 操作不存在的列族会返回成功,但无法读取。 - -## 创建与析构 - -由tera::Table::NewRowMutation创建,不能由用户创建。 - -用户需要自行析构: - * 同步模式下Put返回后即可析构 - * 异步模式下需要等待回调返回,并处理完成后析构,建议在回调函数末尾进行析构 -## API - -### 更新 - -Key-value模式更新。若设定ttl,数据会在ttl时间超时后被淘汰。 +## 1. 数据结构 +``` + enum Type { + kPut, + kDeleteColumn, + kDeleteColumns, + kDeleteFamily, + kDeleteRow, + kAdd, + kPutIfAbsent, + kAppend, + kAddInt64 + }; + struct Mutation { + Type type; + std::string family; + std::string qualifier; + std::string value; + int64_t timestamp; + int32_t ttl; + }; +``` + +## 2. 主要接口与用法 +#### 2.1 更新 + + +表格类型 | 接口功能 | 接口 | 参数 | 可省参数 | 返回值类型 | 其它说明 +--- | --- | --- | --- | --- | --- | --- +表格模式 | 修改一个列 | Put | const std::string& family, const std::string& qualifier, const int64_t value, int64_t timestamp | timestamp可省,省略时为-1 | void | Counter场景下使用,设定初始值。 +表格模式 | 修改一个列的特定版本 | Put | const std::string& family, const std::string& qualifier, const std::string& value, int64_t timestamp| timestamp可省,省略时为-1 | void | 若设定timestamp,数据会被更新至指定时间,危险,不建议使用 +表格模式 | 修改一个带TTL列的特定版本 | Put | const std::string& family, const std::string& qualifier, int64_t timestamp, const std::string& value, int32_t ttl | | void | +表格模式 | 修改一个列的特定版本 | Put | const std::string& family, const std::string& qualifier, int64_t timestamp, const std::string& value | | void | +表格模式 | 原子操作:如果不存在才能Put成功 | PutIfAbsent | const std::string& family, const std::string& qualifier, const int64_t delta | | void |若不存在,更新生效;否则更新数据不生效。delta可为负数。 +表格模式 | 原子加一个Cell | Add | const std::string& family, const std::string& qualifier, const int64_t delta | | void | Counter场景下使用,累加。若无初始值,会从0开始累加 +表格模式 | 原子加一个Cell | Append | const std::string& family, const std::string& qualifier, const std::string& value | | void | 将value追加至此列原数据末尾;若原数据不存在,则与Put等效。 +k-v模式 |修改带TTL的默认列 | Put | const std::string& value, int32_t ttl | ttl 可省,默认为-1 | void |若设定ttl,数据会在ttl时间超时后被淘汰。 + +#### 2.2 删除 +##### (1) 删除整行 RowMutation::DeleteRow +删除整行的指定范围版本。 +``` +void DeleteRow(int64_t timestamp = -1) = 0;//若设定timestamp,则删除此时间之前的所有更新。 Key-value模式下timestamp不生效。 +``` + +##### (2) 删除某列族 RowMutation::DeleteFamily +删除一个列族的所有列的指定范围版本。 ``` -void Put(const std::string& value, int32_t ttl = -1); +void DeleteFamily(const std::string& family, int64_t timestamp = -1) = 0;//若设定timestamp,则删除此时间之前的所有更新。 ``` -表格模式更新。若设定timestamp,数据会被更新至指定时间,危险,不建议使用。 + +##### (3) 删除某列所有版本 RowMutation::DeleteColumns +删除一个列的指定范围版本。 ``` -void Put(const std::string& family, const std::string& qualifier, const std::string& value, int64_t timestamp = -1); +void DeleteColumns(const std::string& family, const std::string& qualifier, int64_t timestamp = -1) = 0;//若设定timestamp,则删除此时间之前的所有更新。 ``` -表格模式更新。Counter场景下使用,设定初始值。 + +##### (4) 删除一个列的指定版本 RowMutation::DeleteColumn ``` -void Put(const std::string& family, const std::string& qualifier, int64_t value, int64_t timestamp = -1); +void DeleteColumn(const std::string& family, const std::string& qualifier, int64_t timestamp) = 0;//若不存在,则不生效。 ``` -表格模式更新。Counter场景下使用,累加。若无初始值,会从0开始累加。 + + +#### 2.3 错误码 +##### (1) 行更新错误码 RowMutation::ErrorCode ``` -void Add(const std::string& family, const std::string& qualifier, const int64_t delta); +const ErrorCode& GetError() = 0; //成功返回KOK ``` -表格模式更新。若不存在,更新生效;否则更新数据不生效。 +##### (2) 设置错误码 RowMutation::SetError ``` -void PutIfAbsent(const std::string& family, const std::string& qualifier, const std::string& value); +void SetError(ErrorCode::ErrorCodeType err, const std::string& reason) = 0; ``` -表格模式更新。将value追加至此列原数据末尾;若原数据不存在,则与Put等效。 +#### 2.4 异步 +若设定回调,则异步提交;否则同步提交。 +##### (1) 设置回调 RowMutation::SetCallBack + +设置异步回调, 操作会异步返回。 ``` -void Append(const std::string& family, const std::string& qualifier, const std::string& value); +void SetCallBack(Callback callback) = 0; ``` -### 删除 - -删除整行。若设定timestamp,则删除此时间之前的所有更新。 -Key-value模式下timestamp不生效。 +##### (2) 获得回调函数 RowMutation::GetCallBack ``` -void DeleteRow(int64_t timestamp = -1); +Callback GetCallBack() = 0; ``` -删除某列族。若设定timestamp,则删除此时间之前的所有更新。 + +#### 2.5 上下文设定 +##### (1) 设置上下文 RowMutation::SetContext +设置用户上下文,可在回调函数中获取。 ``` -void DeleteFamily(const std::string& family, int64_t timestamp = -1); +void SetContext(void* context) = 0; ``` -删除某列所有版本。若设定timestamp,则删除此时间之前的所有更新。 + +##### (2) 获取用户上下文 RowMutation::GetContext ``` -void DeleteColumns(const std::string& family, const std::string& qualifier, int64_t timestamp = -1); +void* GetContext() = 0; ``` -删除某列指定时间更新。若不存在,则不生效。 +#### 2.6 超时设定 +设定单个mutation的超时时间。 如没有特殊需要,不必单独设定,使用sdk的统一超时即可。 +##### (1) 设置超时时间 RowMutation::SetTimeOut + +设置超时时间(只影响当前操作,不影响Table::SetWriteTimeout设置的默认写超时) ``` -void DeleteColumn(const std::string& family, const std::string& qualifier, int64_t timestamp); +void SetTimeOut(int64_t timeout_ms) = 0; ``` - -### 异步 - -若设定回调,则异步提交;否则同步提交。 + +##### (2) 超时 RowMutation::TimeOut ``` -typedef void (*Callback)(RowMutation* param); -void SetCallBack(Callback callback); -Callback GetCallBack(); -bool IsAsync(); +int64_t TimeOut() = 0 ``` - -### 超时设定 - -设定单个mutation的超时时间。 -如没有特殊需要,不必要单独设定,使用sdk的统一超时即可。 + #### 2.7 其他操作 +##### (1) 获取行更新的操作数 RowMutation::MutationNum ``` -void SetTimeOut(int64_t timeout_ms); -int64_t TimeOut() = 0; +uint32_t MutationNum() = 0; ``` - -### 上下文设定 - -用于回调中获取用户自定义上下文信息。 -内存由用户自己管理。 - + +##### (2) 获取mutation总大小 RowMutation::Size ``` -void SetContext(void* context); -void* GetContext(); +uint32_t Size() = 0; ``` - -### 其它 - + +##### (3) 返回row_key RowMutation::RowKey ``` -uint32_t MutationNum(); -uint32_t Size(); -const RowMutation::Mutation& GetMutation(uint32_t index); +const std::string& RowKey() = 0; ``` - -### 预发布 - -获取所属事务 + +##### (4) 返回mutation RowMutation::GetMutation ``` -Transaction* GetTransaction(); +const RowMutation::Mutation& GetMutation(uint32_t index) = 0; ``` + diff --git a/doc/sdk_reference/reader.md b/doc/sdk_reference/reader.md index 476d945fb..876e5f8ca 100644 --- a/doc/sdk_reference/reader.md +++ b/doc/sdk_reference/reader.md @@ -1,103 +1,61 @@ -# RowReader +# Reader接口说明 tera sdk中通过RowReader结构描述一次行读取操作,并获取返回数据。 -## 创建与析构 - -由tera::Table::NewRowReader创建,不能由用户创建。 - -用户需要自行析构: - * 同步模式下Get返回后即可析构 - * 异步模式下需要等待回调返回,并处理完成后析构,建议在回调函数末尾进行析构 - -## API - -### 描述过滤条件 - -通过相关的API可以对列名、更新时间、版本数目等信息描述,从而对返回数据集合进行过滤。 - -如果不进行任何描述,默认返回此行所有数据。 - -#### AddColumnFamily - +## 1. 主要接口与用法 +#### 1.1 描述过滤条件 +通过相关的API可以对列名、更新时间、版本数目等信息描述,从而对返回数据集合进行过滤。如果不进行任何描述,默认返回此行所有数据。 +##### (1) 可以增加多个列族 RowReader::AddColumnFamily ``` -void AddColumnFamily(const std::string& family); +void AddColumnFamily(const std::string& family) = 0;//如此“family”不存在于表格的schema中,则不进行过滤 ``` - -限定返回数据的列族为“family”。 - -可以增加多个列族。 - -如此“family”不存在于表格的schema中,则不进行过滤。 - -#### AddColumn - + +##### (2) 可以增加多个列 RowReader::AddColumn ``` -void AddColumn(const std::string& family, const std::string& qualifier); +void AddColumn(const std::string& family, const std::string& qualifier); //除限定返回数据列族为“family”外,其列名必须为“qualifier”。 ``` - -与AddColumnFamily类似,除限定返回数据列族为“family”外,其列名必须为“qualifier”。 - -此操作与AddColumnFamily共同生效,返回数据为二者并集。 - -#### SetTimeRange - + +##### (3) 设定最大版本数 RowReader::SetMaxVersions ``` -void SetTimeRange(int64_t ts_start, int64_t ts_end); +void SetMaxVersions(uint32_t max_version) = 0; //从最新版本开始计数,若实际数据版本数小于此值,全部返回。在最大版本数基础上再进行时间过滤。 ``` - -设定返回数据的更新时间范围。 - -只返回更新时间在[ts_start, ts_end]范围内的数据。 - -其中ts_start、ts_end均为Unix时间戳,单位为微秒(us)。 - -#### SetMaxVersions - + +##### (4) 设定返回数据的更新时间范围 RowReader::SetTimeRange ``` -void SetMaxVersions(uint32_t max_version); +void SetTimeRange(int64_t ts_start, int64_t ts_end) = 0;//只返回更新时间在[ts_start, ts_end]范围内的数据。其中ts_start、ts_end均为Unix时间戳,单位为微秒(us)。 ``` - -设定最大版本数。 - -从最新版本开始计数,若实际数据版本数小于此值,全部返回。 - -过滤优先级高于TimeRange,即在最大版本数基础上再进行时间过滤。 - -### 获取数据 - + +#### 1.2 获取数据 在RowReader被提交至服务端并返回后,可以从此结构中获取返回的数据。 - 支持两种获取方式: +
    +
  • 迭代器方式。依次遍历所有列、所有版本。
  • +
  • 全量输出。返回一个特定结构的std::Map,可按列名等信息进行访问。
  • +
- * 迭代器方式。依次遍历所有列、所有版本。 - * 全量输出。返回一个特定结构的std::Map,可按列名等信息进行访问。 - -#### 迭代器方式 +##### (1) 访问数据前通过Done进行确认 RowReader::Done +``` +bool Done() = 0;;//若返回false,则数据已遍历完毕。 ``` -bool Done(); -void Next(); + +##### (2) 访问数据前通过Next进行确认 RowReader::Next ``` - -访问数据前通过Done()进行确认。 - -若返回false,则数据已遍历完毕。 - +void Next() = 0; +``` + +##### (3) 当数据存在时,可以通过以下接口访问此单元格的各字段值 +当通过RowReader访问key-value模式的表时,除RowKey和Value外,其它字段值无效。 ``` const std::string& RowKey(); std::string Value(); -std::string Family(); -std::string Qualifier(); -int64_t Timestamp(); +std::string Family() = 0; +std::string Qualifier() = 0; +int64_t Timestamp() = 0; ``` - -当数据存在时,可以通过这些接口访问此单元格的各字段值。 - -当通过RowReader访问key-value模式的表时,除RowKey和Value外,其它字段值无效。 - -#### 全量输出 - + +##### (4) 全量输出 +通过多级std::map的形式进行访问。 ``` typedef std::map TColumn; typedef std::map TColumnFamily; @@ -105,37 +63,50 @@ typedef std::map TRow; virtual void ToMap(TRow* rowmap); ``` -通过多级std::map的形式进行访问。 - -### 异步与上下文设定 - +#### 1.3 错误码 +##### (1) 获取错误码 RowReader::ErrorCode +``` +const ErrorCode& GetError() = 0; //成功返回KOK +``` +#### 1.4 异步 若设定回调,则异步提交;否则同步提交。 +##### (1) 设置回调 RowReader::SetCallBack ``` -typedef void (*Callback)(RowMutation* param); -void SetCallBack(Callback callback); -Callback GetCallBack(); +void SetCallBack(Callback callback) = 0; ``` -用于回调中获取用户自定义上下文信息。 -内存由用户自己管理。 - +##### (2) 设置回调 RowReader::GetCallBack ``` -void SetContext(void* context); -void* GetContext(); +void (*Callback)(RowReader* param); ``` -### 超时设定 +#### 1.5 上下文设定 +用于回调中获取用户自定义上下文信息。 内存由用户自己管理。 -设定单个reader的超时时间。 -如没有特殊需要,不必要单独设定,使用sdk的统一超时即可。 +##### (1) 设置上下文 RowReader::SetContext ``` -void SetTimeOut(int64_t timeout_ms); -int64_t TimeOut() = 0; +void SetContext(void* context) = 0; ``` - -### 预发布 - -获取所属事务 + +##### (2) 获取上下文 RowReader::GetContext +``` +void* GetContext() = 0; +``` +#### 1.6 超时设定 +设定单个reader的超时时间。如没有特殊需要,不必要单独设定,使用sdk的统一超时即可。 +##### (1) 设置超时时间 RowReader::SetTimeOut +``` +void SetTimeOut(int64_t timeout_ms) = 0; +``` + +#### 1.7 其他 +##### (1) 获取表格 RowReader::GetTable +``` +Table* GetTable() = 0; +``` + +##### (2) 获取按列过滤的map ``` -Transaction* GetTransaction(); +typedef std::map >ReadColumnList; +const ReadColumnList& GetReadColumnList() = 0; ``` diff --git a/doc/sdk_reference/readme.md b/doc/sdk_reference/readme.md new file mode 100644 index 000000000..c57f747cf --- /dev/null +++ b/doc/sdk_reference/readme.md @@ -0,0 +1,42 @@ +# Tera SDK主要api接口说明 + + +### 主要数据结构 + +* tera::[client](../sdk_reference/client.md) +* tera::[table](../sdk_reference/table.md) +* tera::[mutation](../sdk_reference/mutation.md) +* tera::[reader](../sdk_reference/reader.md) +* tera::[table_descriptor](../sdk_reference/table_descriptor.md) +* tera::[transaction](../sdk_reference/transaction.md) +* tera::[scan](../sdk_reference/scan.md) +* tera::[utils](../sdk_reference/utils.md) + + +### 介绍 +#### (1) tera::client 访问tera服务主结构,所有对tera的访问或操作全部由此发起。 +一个集群对应一个client即可,如需访问多个client,需要创建多个 +##### 主要功能包括: +* 表格操作:建、删、加载、卸载、打开、关闭、更新表结构、获取表格信息、快照等 +* 用户管理:建、删、修改密码、组管理等 +* 集群信息获取:获取全部表格列表、状态等 + +#### (2) tera::table 表格主结构,对表格的所有增删查改操作由此发起。 +由tera::Client::OpenTable产生,tera::Client::CloseTable关闭,不可析构。 + +#### (3) tera::error_code 错误码,很多操作会返回,注意检查。 + +#### (4) tera::mutation + +#### (5) tera::scan 扫描操作,并获取返回数据。 + +#### (6) tera::reader 读取操作,并获取返回数据。 + +#### (7) tera::table_descriptor 表格描述符主体 + +#### (8) tera::transaction 单行事务 + + +#### (9) tera::scan 扫描 + +#### (10) tera::utils 编码解码 diff --git a/doc/sdk_reference/scan.md b/doc/sdk_reference/scan.md new file mode 100644 index 000000000..dadb915fd --- /dev/null +++ b/doc/sdk_reference/scan.md @@ -0,0 +1,98 @@ + +# scan接口说明 +tera中scan操作由ResultStream和ScanDescriptor两个数据结构进行描述。 +### 1. ResultStream + +##### (1) 检查迭代是否结束 +``` +bool Done(ErrorCode* err = NULL) = 0; //如果检查失败则返回error code。 +``` + +##### (2) 移到下一个cell + +``` +void Next() = 0; +``` + +##### (3) 获取当前cell的rowkey名字 +``` +std::string RowName() const = 0; +``` +##### (4) 获取当前cell的簇 +``` +std::string Family() const = 0; +``` + +##### (5) 获取当前cell的列 +``` +std::string Qualifier() const = 0; +``` + +##### (6) 返回时间戳 +``` +int64_t Timestamp() const = 0; +``` + +##### (7) 返回当前cell的值 +``` +std::string Value() const = 0; +int64_t ValueInt64() const = 0; +``` + +### 2. ScanDescriptor + +##### (1) 设置扫描的结束key +``` +void SetEnd(const std::string& rowkey); +``` + +##### (2) 设置扫描的目标cf + +``` +void AddColumnFamily(const std::string& cf); +``` + +##### (3) 设置扫描的目标列 +``` + void AddColumn(const std::string& cf, const std::string& qualifier); +``` +##### (4) 设置每列的maxversion +``` +void SetMaxVersions(int32_t versions); +``` + +##### (5) 设置每个扫描结果的时间范围 +``` +void SetTimeRange(int64_t ts_end, int64_t ts_start); +``` + +##### (6) 设置批量扫描模式 +``` +void SetAsync(bool async); +``` + +##### (7) 检查扫描是否为批量扫描模式 +``` +bool IsAsync() const; +``` + +##### (8) 设置扫描的超时时间 +``` +void SetPackInterval(int64_t timeout); +``` + +##### (9) 设置扫描的buffersize +``` +void SetBufferSize(int64_t buf_size);//默认为64K +``` + +##### (10) 设置每次扫描的cell数 +``` +void SetNumberLimit(int64_t number_limit); +``` + +##### (11) 获取每次扫描的cell数 +``` +int64_t GetNumberLimit(); +``` + diff --git a/doc/sdk_reference/table.md b/doc/sdk_reference/table.md new file mode 100644 index 000000000..58894a8ed --- /dev/null +++ b/doc/sdk_reference/table.md @@ -0,0 +1,100 @@ + +# Table接口说明 + +## 1. 主要数据结构 +#### 1. 表格信息 +``` +struct TableInfo { + TableDescriptor* table_desc; //表的描述符 + std::string status; //表格状态信息 +}; +``` +#### 2. tablet信息 +``` +struct TabletInfo { + std::string table_name; //表名 + std::string path; //路径 + std::string server_addr; //服务器地址 + std::string start_key; //起始key + std::string end_key; //结束key + int64_t data_size; //数据大小 + std::string status; //状态 +}; +``` + +## 2. 主要接口 +##### (1) 获取表名 Table::GetName +``` +const std::string GetName() = 0 +``` + +##### (2) 行mutation操作 Table::NewRowMutation +``` +RowMutation* NewRowMutation(const std::string& row_key) = 0 +``` +##### (3) 写数据 Table::Put +``` +1) void Put(RowMutation* row_mutation) = 0 +2) void Put(const std::vector& row_mutations) = 0 +3) bool Put(const std::string& row_key, const std::string& family, const std::string& qualifier, const std::string& value, ErrorCode* err) = 0 +4) bool Put(const std::string& row_key, const std::string& family, const std::string& qualifier, const int64_t value, ErrorCode* err) = 0; +5) bool PutIfAbsent(const std::string& row_key, const std::string& family, const std::string& qualifier, const std::string& value, ErrorCode* err) = 0; +``` + +##### (4) 检查写数据是否结束 Table::IsPutFinished + +``` +bool IsPutFinished() = 0 +``` + +##### (5) 添加数据 Table::Add + +``` +bool Add(const std::string& row_key, const std::string& family, const std::string& qualifier, int64_t delta, ErrorCode* err) = 0; +``` + +##### (6) 追加数据 Table::Append + +``` +bool Append(const std::string& row_key, const std::string& family, const std::string& qualifier, const std::string& value, ErrorCode* err) = 0; +``` + +##### (7) 按行读数据 Table::NewRowReader + +``` +RowReader* NewRowReader(const std::string& row_key) = 0 +``` + +##### (8) 读数据 Table::Get + +``` +1) void Get(RowReader* row_reader) = 0 +2) void Get(const std::vector& row_readers) = 0; +3) bool Get(const std::string& row_key, const std::string& family, const std::string& qualifier, std::string* value, ErrorCode* err) = 0; +4) bool Get(const std::string& row_key, const std::string& family, const std::string& qualifier, int64_t* value, ErrorCode* err) = 0; +``` + +##### (9) 检查get是否结束 Table::IsGetFinished +``` +bool IsGetFinished() = 0; +``` + +##### (10) 扫描 Table::Scan +``` +ResultStream* Scan(const ScanDescriptor& desc, ErrorCode* err) = 0 +``` +##### (11) 按行事务处理 Table::StartRowTransaction +``` +Transaction* StartRowTransaction(const std::string& row_key) = 0 +``` + +##### (12) 提交行事务 Table::CommitRowTransaction +``` +void CommitRowTransaction(Transaction* transaction) = 0 +``` + +##### (13) 执行mutation Table::ApplyMutation +```c +void ApplyMutation(RowMutation* row_mu) = 0; +void ApplyMutation(const std::vector& row_mu_list) = 0; +``` diff --git a/doc/sdk_reference/table_descriptor.md b/doc/sdk_reference/table_descriptor.md index ccf79a1f1..cbc2e0670 100644 --- a/doc/sdk_reference/table_descriptor.md +++ b/doc/sdk_reference/table_descriptor.md @@ -1,350 +1,217 @@ -# 表格描述 -tera中的表格由TableDescriptor、LocalityGroupDescriptor、ColumnFamilyDescriptor三个数据结构进行描述,C++接口。 - -同时也支持更简单的字符串描述,参见本文最后。 - -## TableDescriptor - -表格描述符主体,LocalityGroupDescriptor、ColumnFamilyDescriptor由其管理。 - -描述表格全局属性,如key拼装方式、分片分裂合并阈值、ACL等信息。 - -### 创建与析构 - -此结构由用户自己创建并析构。 +# table_descriptor接口说明 +tera中的表格由ColumnFamilyDescriptor、LocalityGroupDescriptor、TableDescriptor三个数据结构进行描述。 +### 1. ColumnFamilyDescriptor +描述一个列族的属性。 +属性支持动态更新。更新状态为最终一致,过程中存在分片之前属性不一致情况,使用时需要注意。 +##### (1) TTL +设定列族内cell的TTL(time-to-live),单位秒,默认无穷大。 +当列族内某cell的更新时间超过此值后,读取时被屏蔽,并在垃圾回收时物理删除。 +``` +void SetTimeToLive(int32_t ttl) = 0; +int32_t TimeToLive() const = 0; +``` -### 使用场景 +##### (2) 最大版本数MaxVersions +设定列族内cell的最大版本数,默认为1。 +当某cell的版本数超过此限制后,会将最旧的版本进行屏蔽,并在垃圾回收时物理删除。 +此值不做最大值限制,但随着版本数大量增加,相应的随机读、扫描性能会下降,存储使用上升,用户可按实际情况调整。 +``` +void SetMaxVersions(int32_t max_versions) = 0; +int32_t MaxVersions() const = 0; +``` - * 表格创建,通过`tera::Client::CreateTable` - * 表格Schema更新,通过`tera::Client::UpdateTable` - * 获取表格属性,通过`tera::Client::GetTableDescriptor` - -### API +##### (3) 获取LG的名字 +``` +const std::string& LocalityGroup() const = 0; +``` +##### (4) 获取Id +``` +int32_t Id() const = 0; +``` -#### TableDescriptor +### 2. LocalityGroupDescriptor +描述一个locality group的属性。 +##### (1) 获取此LG名字 ``` -TableDescriptor(const std::string& name); +const std::string& Name() const; ``` -构造表格名为“name”的表格描述符。 +##### (2) 设定、获取存储介质,默认kInDisk +``` +void SetStore(StoreType type) = 0; +StoreType Store() const = 0; +enum StoreType { + kInDisk = 0, + kInFlash = 1, + kInMemory = 2, +}; +``` -其中表格名长度需要小于256字节,字符只支持{[a-z],[A-Z],[0-9],'_','-'}。 +##### (3) 设定、获取物理文件内部block大小 +``` +void SetBlockSize(int block_size) = 0;//设定、获取物理文件内部block大小,单位KB,默认值:4。 +int BlockSize() const = 0; +``` +##### (4) 设定、获取物理文件基础大小 +``` +int32_t SstSize() const = 0;//设定、获取物理文件内部block大小,单位KB,默认值:4。 +void SetSstSize(int32_t sst_size) = 0; +``` +##### (5) 获取/得到compress type +``` + void SetCompress(CompressType type) = 0; + CompressType Compress() const = 0; +``` +##### (6) 设定、获取是否使用bloom filter +设定、获取是否使用bloom filter,默认不使用。 +``` +void SetUseBloomfilter(bool use_bloomfilter) = 0; +bool UseBloomfilter() const = 0; +``` +##### (7) 内存内compact +是否使用内存内compact。 +``` +bool UseMemtableOnLeveldb() const = 0; +void SetUseMemtableOnLeveldb(bool use_mem_ldb) = 0; +``` +##### (8) 设定、获取内存compact中写缓存大小 +设定、获取内存compact中写缓存大小,单位KB。 +``` +int32_t MemtableLdbWriteBufferSize() const = 0; +void SetMemtableLdbWriteBufferSize(int32_t buffer_size) = 0; +``` +##### (9) 设定、获取内存compact中对应block大小 +设定、获取内存compact中对应block大小,单位KB。 +``` +int32_t MemtableLdbBlockSize() const = 0; +void SetMemtableLdbBlockSize(int32_t block_size) = 0; +``` + +### 3. TableDescriptor +表格描述符主体,LocalityGroupDescriptor、ColumnFamilyDescriptor由其管理。 +描述表格全局属性,如key拼装方式、分片分裂合并阈值、ACL等信息。 +使用场景 +
    +
  • 表格创建,通过tera::Client::CreateTable
  • +
  • 表格Schema更新,通过tera::Client::UpdateTable
  • +
  • 获取表格属性,通过tera::Client::GetTableDescriptor
  • +
-#### TableName +#### 3.1 TableDescriptor +##### (1) 获取表名 +设置、返回表格名。 ``` void SetTableName(const std::string& name); std::string TableName() const; ``` -设置、返回表格名。 - -#### LocalityGroup - +##### (2) 新增一个名为‘lg_name’的LG +其中,LocalityGroup名长度需要小于256字节,字符只支持{[a-z],[A-Z],[0-9],'_','-'} ``` LocalityGroupDescriptor* AddLocalityGroup(const std::string& lg_name); ``` -新增一个名为‘lg_name’的LG。 - -其中的LocalityGroup名长度需要小于256字节,字符只支持{[a-z],[A-Z],[0-9],'_','-'}。 - +##### (3) 删除名为‘lg_name’的LG ``` -bool RemoveLocalityGroup(const std::string& lg_name); +bool RemoveLocalityGroup(const std::string& lg_name);//如果此LG中还有列族存在,删除失败。 ``` - -删除名为‘lg_name’的LG。 - -如果此LG中还有列族存在,删除失败。 - +##### (4) 通过id/名称访问对应LG +LG在表格内部以vector形式保存,id为其对应的下标。 ``` const LocalityGroupDescriptor* LocalityGroup(int32_t id) const; const LocalityGroupDescriptor* LocalityGroup(const std::string& lg_name) const; ``` - -通过id/名称访问对应LG。 - -LG在表格内部以vector形式保存,id为其对应的下标。 - +##### (5) 获取/得到compress type ``` -int32_t LocalityGroupNum() const; + void SetCompress(CompressType type) = 0; + CompressType Compress() const = 0; ``` - -返回当前表格中LG数量。 - -#### ColumnFamily - +##### (6) 返回当前表格中LG数量 ``` -ColumnFamilyDescriptor* AddColumnFamily(const std::string& cf_name,const std::string& lg_name); +int32_t LocalityGroupNum() const; ``` + +#### 3.2 ColumnFamily -在‘lg_name’下新增一个名为‘cf_name’的列族。 - -若‘lg_name’不存在,返回NULL。 - -其中列族名长度需要小于256字节,字符只支持{[a-z],[A-Z],[0-9],'_','-'}。 - +##### (1) 在‘lg_name’下新增一个名为‘cf_name’的列族 +若‘lg_name’不存在,返回NULL。其中列族名长度需要小于256字节,字符只支持{[a-z],[A-Z],[0-9],'_','-'}。 +``` +ColumnFamilyDescriptor* AddColumnFamily(const std::string& cf_name, const std::string& lg_name = "lg0"); ``` +##### (2) 删除名为‘cf_name’的列族 +``` void RemoveColumnFamily(const std::string& cf_name); ``` - -删除名为‘cf_name’的列族。 - +##### (3) 通过id/名称访问对应列族 +列族在表格内部以vector形式保存,id为其对应的下标。 ``` const ColumnFamilyDescriptor* ColumnFamily(int32_t id) const; const ColumnFamilyDescriptor* ColumnFamily(const std::string& cf_name) const; ``` - -通过id/名称访问对应列族。 - -列族在表格内部以vector形式保存,id为其对应的下标。 - +##### (4) 返回当前表格中列族数量 ``` int32_t ColumnFamilyNum() const; ``` -返回当前表格中列族数量。 +#### 3.3 RawKey -#### RawKey - -``` +##### (1) 表格内部key的拼装格式 +决定了表格的存储及访问格式,推荐kBinary。 +``` +void SetRawKey(RawKeyType type); +RawKeyType RawKey() const; enum RawKeyType { kReadable = 0, - kBinary = 1, + kBinary = 1, kTTLKv = 2, kGeneralKv = 3, -}; -void SetRawKey(RawKeyType type); -RawKeyType RawKey() const; -``` - -表格内部key的拼装格式。 - -决定了表格的存储及访问格式,推荐kBinary。 - -#### SplitSize - -``` -void SetSplitSize(int64_t size); -int64_t SplitSize() const; +}; ``` - -分片分裂阈值。 - +#### 3.4 SplitSize +##### (1) 分片分裂阈值 当分片数据量(物理存储)超过此阈值时,会被一分为二,并可能被两个不同服务器加载。 - 此分裂阈值是一个基础参考值,系统会根据实际动态负载在此值基础上进行调整。 - -#### MergeSize - ``` -void SetMergeSize(int64_t size); -int64_t MergeSize() const; +void SetSplitSize(int64_t size); +int64_t SplitSize() const; ``` -分片合并阈值。 - +#### 3.5 MergeSize +##### (1) 分片合并阈值 当分片数据量(物理存储)低于此阈值时,会被合并至相临分片中。 - 此值是一个基础参考值,系统会根据实际动态负载在此值基础上进行调整。 - 需要小于分裂阈值的1/3,防止出现合并、分裂的循环出现。 -#### Write Ahead Log - -``` -void DisableWal(); -bool IsWalDisabled() const; +``` +void SetMergeSize(int64_t size); +int64_t MergeSize() const; ``` - -配置日志开关,默认打开。 - +#### 3.6 Write Ahead Log +##### (1) 配置日志开关,默认打开 当此表格数据没有强特久化需求时,可以选择关闭日志。 - 会大幅提升写性能、降低系统IO消耗。 - 当有服务器宕机时,内存中数据将丢失,谨慎关闭。 -#### Admin - -``` -void SetAdmin(const std::string& name); -std::string Admin() const; -void SetAdminGroup(const std::string& name); -std::string AdminGroup() const; -``` - -设置表格ACL信息。 - -## LocalityGroupDescriptor - -描述一个locality group的属性。 - -### 创建与析构 - -通过`TableDescriptor::AddLocalityGroup`进行创建。 - -无须用户析构。 - -### API - -#### Name - -``` -const std::string& Name() const; -``` - -获取此LG名字。 - -#### Store - -``` -enum StoreType { - kInDisk = 0, - kInFlash = 1, - kInMemory = 2, -}; -void SetStore(StoreType type); -StoreType Store() const; -``` - -设定、获取存储介质,默认kInDisk。 - -#### BlockSize、SstSize、BloomFilter - -``` -void SetBlockSize(int block_size); -int BlockSize() const; -``` - -设定、获取物理文件内部block大小,单位KB,默认值:4。 - -物理存储基于leveldb开发,此概念与leveldb中的block相似。 - -``` -void SetSstSize(int sst_size); -int SstSize() const; -``` - -设定、获取物理文件基础大小,单位MB,默认值:8。 - -物理存储基于leveldb开发,此概念与leveldb中的level1文件大小相同。 - -``` -void SetUseBloomfilter(bool use_bloomfilter); -bool UseBloomfilter() const; -``` - -设定、获取是否使用bloom filter,默认不使用。 - -物理存储基于leveldb开发,此概念与leveldb中的bloom filter。 - -#### 内存内compact - -``` -bool UseMemtableOnLeveldb() const; -void SetUseMemtableOnLeveldb(bool use_mem_ldb); -``` - -是否使用内存内compact。 - -``` -int32_t MemtableLdbWriteBufferSize() const; -void SetMemtableLdbWriteBufferSize(int32_t buffer_size); -``` - -设定、获取内存compact中写缓存大小,单位KB。 - -``` -int32_t MemtableLdbBlockSize() const; -void SetMemtableLdbBlockSize(int32_t block_size); +``` +void DisableWal(); +bool IsWalDisabled() const; ``` - -设定、获取内存compact中对应block大小,单位KB。 - -## ColumnFamilyDescriptor - -描述一个列族的属性。 - -属性支持动态更新。更新状态为最终一致,过程中存在分片之前属性不一致情况,使用时需要注意。 - -### 创建与析构 - -通过`TableDescriptor::AddColumnFamily`进行创建。 - -无须用户析构。 - -### API - -#### TTL +#### 3.7 事务 +##### (1) 事务处理 ``` -void SetTimeToLive(int32_t ttl); -int32_t TimeToLive() const; +void EnableTxn(); +bool IsTxnEnabled() const; ``` - -设定列族内cell的TTL(time-to-live),单位秒,默认无穷大。 - -当列族内某cell的更新时间超过此值后,读取时被屏蔽,并在垃圾回收时物理删除。 - -#### MaxVersion +#### 3.8 Admin +##### (1) 设置表格的admin ``` -void SetMaxVersions(int32_t max_versions); -int32_t MaxVersions() const; +void SetAdmin(const std::string& name); +std::string Admin() const; +void SetAdminGroup(const std::string& name); +std::string AdminGroup() const; ``` - -设定列族内cell的最大版本数,默认为1。 - -当某cell的版本数超过此限制后,会将最旧的版本进行屏蔽,并在垃圾回收时物理删除。 - -此值不做最大值限制,但随着版本数大量增加,相应的随机读、扫描性能会下降,存储使用上升,用户可按实际情况调整。 - -## 字符串描述 - -描述表格的字符串是一个支持描述节点属性的树结构,语法详见[PropTree](https://github.com/BaiduPS/tera/blob/master/doc/prop_tree.md) - -### 描述表格存储 - -表格结构中包含表名、locality groups定义、column families定义,一个典型的表格定义如下(可写入文件): - - # tablet分裂阈值为4096M,合并阈值为512M - # 三个lg,分别配置为flash、flash、磁盘存储 - table_hello { - lg_index { - update_flag - }, - lg_props { - level, - weight - }, - lg_raw { - data - } - } - -如果无需配置LG,指定表名和所需列名即可(所有的属性可配): - - table_hello {cf0, cf1, cf2} - -### 描述key-value存储 - -只需指定表名即可,若需要指定存储介质等属性,可选择性添加: - - kv_hello # 简单key-value - kv_hello # 配置若干属性 - -### 属性及含义 - -span | 属性名 | 意义 | 有效取值 | 单位 | 默认值 | 其它说明 ---- | --- | --- | --- | --- | --- | --- -table | splitsize | 某个tablet增大到此阈值时分裂为2个子tablets| >=0,等于0时关闭split | MB | 512 | -table | mergesize | 某个tablet减小到此阈值时和相邻的1个tablet合并 | >=0,等于0时关闭merge | MB | 0 | splitsize至少要为mergesize的5倍 -lg | storage | 存储类型 | "disk" / "flash" / "memory" | - | "disk" | -lg | blocksize | LevelDB中block的大小 | >0 | KB | 4 | -lg | use_memtable_on_leveldb | 是否启用内存compact | "true" / "false" | - | false | -lg | sst_size | 第一层sst文件大小 | >0 | MB | 8 | -cf | maxversions | 保存的最大版本数 | >0 | - | 1 | -cf | ttl | 数据有效时间 | >=0,等于0时此数据永远有效 | second | 0 | diff --git a/doc/sdk_reference/transaction.md b/doc/sdk_reference/transaction.md new file mode 100644 index 000000000..7a9ba1ae1 --- /dev/null +++ b/doc/sdk_reference/transaction.md @@ -0,0 +1,60 @@ + +# 单行事务transaction接口说明 + +## 主要功能 + + +##### (1) 提交一个修改操作 Transaction::ApplyMutation +``` +void ApplyMutation(RowMutation* row_mu) = 0 +``` + +##### (2) 读取操作 Transaction::Get +``` +ErrorCode Get(RowReader* row_reader) = 0 +``` +##### (3) 回调函数原型 Transaction::Callback +``` +typedef void (*Callback)(Transaction* transaction) +``` + +##### (4) 设置提交回调, 提交操作会异步返回 Transaction::SetCommitCallback + +``` +void SetCommitCallback(Callback callback) = 0; +``` + +##### (5) 获取提交回调 Transaction::GetCommit + +``` +Callback GetCommitCallback() = 0; +``` + +##### (6) 设置用户上下文,可在回调函数中获取 Transaction::SetContext + +``` +void SetContext(void* context) = 0; +``` + +##### (7) 获取用户上下文 Transaction::GetContext + +``` +void* GetContext() = 0 +``` + +##### (8) 获得结果错误码 Transaction::GetError + +``` +const ErrorCode& GetError() = 0; // 异步模式下,通过GetError()获取提交结果 +``` + +##### (9) 同步模式下,获得提交的结果 Transaction::Commit +``` +ErrorCode Commit() = 0 // 同步模式下,Commit()的返回值代表了提交操作的结果(成功 或者 失败及其原因) +``` + +##### (10) 获取事务开始时间戳 Transaction::GetStartTimestamp +``` +int64_t GetStartTimestamp() = 0 //仅在全局事务场景下有效 +``` + diff --git a/doc/sdk_reference/utils.md b/doc/sdk_reference/utils.md new file mode 100644 index 000000000..0ad5ba27a --- /dev/null +++ b/doc/sdk_reference/utils.md @@ -0,0 +1,14 @@ + +# utils接口说明 +tera中utils操作主要用来编码和解码counter cell +##### (1) 编码 +``` +static std::string EncodeCounter(int64_t counter); +``` + +##### (2) 解码 + +``` +static bool DecodeCounter(const std::string& buf, int64_t* counter); +``` + diff --git a/doc/tools/benchmark.md b/doc/tools/benchmark.md new file mode 100644 index 000000000..5f8ce2941 --- /dev/null +++ b/doc/tools/benchmark.md @@ -0,0 +1,38 @@ + +## 1. tera_bench +造数据的工具 +### (1) 用法 +``` +./tera_bench --compression_ratio=1 --key_seed=1 --value_seed=20 --value_size=1000 --num=200000 --benchmarks=random --key_size=24 --key_step=1 +``` + +## 2. tera_mark +读写数据,支持异步读写scan + +### (1) 用法 +``` +#示例: +./tera_mark --mode=w --tablename=test --type=async --verify=false --entry_limit=1000 +``` + +### (2) 参数列表 + +参数名 | 意义 | 有效取值 | 单位 | 默认值 | 其它说明 +--- | --- | --- | --- | --- | --- +table | 表名 | - | - | "" | +mode | 模式 | "w"/"r"/"s"/"m" | - | "w" | - +type | 类型 | "sync"/"async" | - | "async" | - +pend_size | 最大pending大小 | - | - | 100 | - +pend_count | 最大pending数 | - | - | 100000 | - +start_key | scan的开始key | - | - | "" | - +end_key | scan的结束key | - | - | "" | - +cf_list | scan的列簇 | - | - | "" | - +print | scan的结果是否需要打印 | true/false | - | false | - +buf_size | scan的buffer_size | >0 | - | 65536 | - +verify | md5 verify(writer&read) | true/false | - | true | - +max_outflow | max_outflow | - | - | -1 | - +max_rate | max_rate | - | - | -1 | - +scan_streaming | enable streaming scan | true/false | - | false | - +batch_count | batch_count(sync) | - | - | 1 | - +entry_limit | writing/reading speed limit | - | - | 0 | - + diff --git a/doc/tools/readme.md b/doc/tools/readme.md new file mode 100644 index 000000000..401fad9e9 --- /dev/null +++ b/doc/tools/readme.md @@ -0,0 +1,10 @@ + +# Tera 主要工具说明 + +## 主要工具 +* 操作tera的工具: [teracli](../tools/teracli.md) +* 集群间数据迁移的dump工具: [terautil](../tools/terautil.md) +* 造数据 & 读写数据的工具: [tera_bench & tera_mark](../tools/benchmark.md) +* 业界通用NoSQL测试的基准测试工具: [YCSB](../tools/ycsb.md) + + diff --git a/doc/tools/teracli.md b/doc/tools/teracli.md new file mode 100644 index 000000000..1ca78c460 --- /dev/null +++ b/doc/tools/teracli.md @@ -0,0 +1,448 @@ + +# teracli使用说明 +./teracli help即可看到相关的命令和使用方法 + +### 1. create 创建表格 +#### 1.1 基本命令 + +```c +./teracli create [] +./teracli createbyfile [] +``` +说明: +* table-schema是一个描述表格结构的字符串。 +* 表名规范:首字符为字母(大小写均可), +* 有效字符包括大小写的英文字母(a-zA-Z)、数字(0-9)、下划线(_)、连字符(-)、点(.)。 1 <= 有效长度 <= +* 512 +* Tera支持在建立表格时预分配若干tablet,tablet分隔的key写在tablet-delimiter-file中,按“\n”分隔。 +* 如果表格schema比较复杂,可以将其写入文件中,通过createbyfile命令进行创建。 + +#### 1.2 创建table模式存储 +表格结构中包含表名、locality groups定义、column families定义,一个典型的表格定义如下(可写入文件) +```c +# tablet分裂阈值为4096M,合并阈值为512M +# 三个lg,分别配置为flash、flash、磁盘存储 +table_hello { + lg_index { + update_flag + }, + lg_props { + level, + weight + }, + lg_raw { + data + } +} +``` +如果只希望简单的使用tera,对性能没有很高要求,那么schema只需指定表名和所需列名即可(如需要,所有的属性也是可配的): +```c +table_hello {cf0, cf1, cf2} +``` + +#### 1.3 创建key-value表 +tera支持高性能的key-value存储,其schema只需指定表名即可,若需要指定存储介质等属性,可选择性添加: +```c + # 表名为key-value,默认storage为disk, splitsize为512M, mergesize为0 +./teracli create kv_hello + # 配置若干属性 +./teracli create "kv_hello " +``` +#### 1.4 表格各级属性 + +span | 属性名 | 意义 | 有效取值 | 单位 | 默认值 | 其它说明 +--- | --- | --- | --- | --- | --- | --- +table | splitsize | 某个tablet增大到此阈值时分裂为2个子tablets| >=0,等于0时关闭split | MB | 512 | +table | mergesize | 某个tablet减小到此阈值时和相邻的1个tablet合并 | >=0,等于0时关闭merge | MB | 0 | +splitsize至少要为mergesize的3倍,建议为mergesize的10倍,避免merge后又分裂 +lg | storage | 存储类型 | "disk" / "flash" / "memory" | - | "disk" | +lg | blocksize | LevelDB中block的大小 | >0 | KB | 4 | +lg | use_memtable_on_leveldb | 是否启用内存compact | "true" / "false" | - | false | +lg | sst_size | 第一层sst文件大小 | >0 | MB | 8 | +cf | maxversions | 保存的最大版本数 | >0 | - | 1 | +cf | ttl | 数据有效时间 | >=0,等于0时此数据永远有效 | second | 0 | +和minversions冲突时以minversions为准 + + +### 2 update 更新表格schema +更新时使用schema语法和建表时的语法基本一致, +不同主要在于更新时只需指定要更新的属性,不需要改动的属性无需列出。 +#### 2.1 基本语法 +```c +./teracli update +``` +#### 2.2 分类 +主要分为两大类更新: +* 更新table模式schema +* 更新kv模式schema + +#### 2.3 更新table模式schema + +支持表格、cf属性热更新 +##### 2.3.1 更新table的属性(不更新lg、cf属性) +```c +./teracli update "table_hello" //更新mergesize +./teracli update "table_hello" //更新mergesize和splitsize +``` +##### 2.3.2 更新lg属性时,***需要disable表格*** +```c +./teracli disable table_hello +./teracli update "table_hello{lg0}" +./teracli update "table_hello{lg0}" //也可以同时修改table属性 +``` +##### 2.3.3 更新cf属性 +```c +./teracli update "table_hello{lg0{cf0}}" +#也可以同时修改table或者lg属性 +./teracli update "table_hello{lg0{cf0}}" +``` +##### 2.3.4 增加、删除cf + +```c +# 在lg0下增加cf1,并设置属性ttl值为123. +# op意为操作,op=add需要放在cf属性的最前面 +./teracli update "table_hello{lg0{cf1}}" + +# 从lg0中删除cf1 +./teracli update "table_hello{lg0{cf1}}" +``` + +#### 2.4 更新kv模式schema +```c +# 更新部分属性时需要disable表格,程序会在运行时给出提示 +./teracli update "kv_hello" +``` + +### 3. update-check + +### 4. enable +将处于disable状态的表格重新enable,恢复读、写服务。 +```c +./teracli enable +``` + +### 5. disable +将处于表格置于disable状态,不再提供读、写服务。 +```c +./teracli enable +``` + +### 6. drop +删除处于disable状态的表格,此操作不可回滚。 +```c +./teracli drop +``` +### 7. rename 重命名表格 +```c +#语法: +./teracli rename +``` +示例: +```c +./teracli rename tb1 tb2 +``` + +### 8. put 向表中写入一个value +向表中写入以rowkey为key,列为columnfamily:qualifier的值value.对于kv模式的表来说,无需columnfamily:qualifier. +```c +#语法: +./teracli put [] +``` +示例: +```c +./teracli put mytable rowkey cf0:qu0 value +``` + +### 9. put-ttl 新增的ttl字段表示这个value的有效时间 +```c +#语法: +./teracli put-ttl [] +``` +示例: +```c +#这个value在20秒内有效,超时就读不到了。 +./teracli put-ttl mytable rowkey cf0:qu0 value 20 +``` + +### 10. putif 原子操作,如果不存在才能put成功 + +```c +#语法: +./teracli putif [] +``` + +### 11. get 读取一个value +```c +#语法: +./teracli get [] +``` +示例: +```c +#这个value在20秒内有效,超时就读不到了。 +./teracli get mytable rowkey cf0:qu0 +``` + +### 12. scan 扫描一个表 +将表中key从[startkey, endkey)范围的所有数据扫描出来。 +每个value可以有多个版本(versions),scan命令默认只输出每个value的最新版本, +想要获取全部版本可以使用scanallv命令。 +```c +#语法: +./teracli scan[allv] +``` +示例: +```c +#扫描整个表 +./teracli scan mytable "" "" +``` + + +### 13. delete 删除一个value +如果只想删除某列最新的一个版本可以用delete1v命令。 +```c +#语法: +./teracli delete[1v] [] +``` + +### 14. put_counter 写入一个counter(计数器) +```c +#语法: +./teracli put_counter [] +``` +示例: +```c +#写入一个初始值为3的计数器: +./teracli put_counter mytable rowkey cf0:qu0 3 +``` +### 15. get_counter 读取一个counter +``` +#语法: +./teracli get_counter [] +``` +示例: +```c +#读取之前写入的那个counter: +./teracli get_counter mytable rowkey cf0:qu0 +``` + +### 16. add 给某个counter加上一个delta值 +``` +#语法: +./teracli add delta +``` +示例: +```c +#读取之前写入的那个counter: +./teracli get_counter mytable rowkey cf0:qu0 +``` + +### 17. putint64 写入一个int64类型counter(计数器) + +``` +#语法: +./teracli putint64 [] +``` +示例: +```c +#写入一个初始值为67的计数器: +./teracli putint64 mytable row1 cf0:qu0 67 +``` + +### 18. getint64 读取一个int64类型的counter + +``` +#语法: +./teracli getint64 [] +``` +示例: +```c +./teracli getint64 mytable row1 cf0:qu0 +``` + +### 19. addint64 对int64类型的counter执行原子加操作 +``` +#语法: +./teracli addint64 delta +``` +示例: +```c +#对之前写入的counter执行-3的操作: +# addint64操作执行完以后,该counter的值为 64 +./teracli addint64 mytable row1 cf0:qu0 -3 +``` +### 20. append 原子操作:追加内容到一个Cell +``` +#语法: +./teracli append [] +``` +示例: +```c +./teracli put mytalbe rowkey cf0:qu0 hello +./teracli append mytable rowkey cf0:qu0 world +#此时再去get会得到helloworld +./teracli get mytable rowkey cf0:qu0 +``` +### 20. batchput 批量写数据 +``` +#语法: +./teracli batchput +``` +### 21. batchget 批量读数据 +``` +#语法: +./teracli batchget +``` +### 22. show 显示表格信息 +``` +#语法: +./teracli show[x] [] +``` +示例: +```c +#查看某个table的信息: +./teracli show mytable +#查看集群内所有table的信息: +./teracli show +``` + +### 23. showx 显示表格详细信息 +``` +#语法: +./teracli show[x] [] +``` +示例: +```c +#查看某个table的信息: +./teracli showx mytable +``` + +### 24. showschema 显示表格schema +表格schema里含有很多属性(例如某个cf保留的最小版本数),创建表格时,没有显示指定的属性都取默认值, +这些属性在showschema时不会显示出来;想要显示全部属性,可以使用showschemax命令。 +``` +#语法: +./teracli showschema[x] +``` + + +### 25. showts 显示tabletnode的信息 +带上后缀'x'得到的信息会更详细(showtsx)。 +``` +#语法: +./teracli showts [] +``` +示例: +```c +#显示某个tabletnode的信息: +./teracli showts "example.company.com:7770" +#显示集群内所有tabletnode的信息: +./teracli showts +``` + +### 26. range 显示表的范围 +``` +#语法: +./teracli range +``` +### 27. txn 事务(仅支持单事务行操作) +``` +#语法: +./teracli txn +operation包括start和commit +./teracli txn start +./teracli txn commit +``` + +### 28. user用户管理 +``` +#语法: +./teracli user +operation包括create、changepwd、show、delete、addtogroup和deletefromgroup +user + create + changepwd + show + delete + addtogroup + deletefromgroup +``` +### 29. tablet +``` +#语法: +./teracli tablet +operation包括move、reload、compact、split、merge和scan +tablet + move + reload + force to unload and load on the same ts + compact + split + merge + scan +``` + +### 30. compact +``` +#语法: +./teracli compact +``` + +### 31. safemode +``` +#语法: +./teracli safemode [get|enter|leave] +``` + +### 32. meta +meta for master memory, meta2 for meta table. +``` +#语法: +./teracli meta[2] [backup|check|repair|show] +``` +### 33. findmaster master的位置 +``` +#语法: +./teracli findmaster +``` +### 34. reload +``` +#语法: +./teracli reload config hostname:port +``` + +### 35. kick +``` +#语法: +./teracli kick +``` + +### 36. findtablet +``` +#语法: +./teracli findtablet +./teracli findtablet +``` + +### 37. cookie +``` +#语法: +./teracli cookie +cookie + dump cookie-file -- dump contents of specified files + findkey cookie-file key -- find the info of a key +``` + +### 38. version版本 +``` +#语法: +./teracli version +``` + diff --git a/doc/tools/terautil.md b/doc/tools/terautil.md new file mode 100644 index 000000000..842b572eb --- /dev/null +++ b/doc/tools/terautil.md @@ -0,0 +1,78 @@ + + +# terautil + +集群间数据迁移的dump工具 +### 1. 用法 +``` +./terautil dump help +``` +#### (1)建表 +``` +./terautil --flagfile=../conf/terautil.flag dump prepare_safe +``` +#### (2) 将扫表操作run起来 +``` +./terautil --flagfile=../conf/terautil.flag dump run +``` + +### 2. flag配置 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
flag名称flag默认值或格式flag介绍
dump_tera_src_conf ../conf/src_tera.flag(格式)tera的源集群
dump_tera_dest_conf../conf/dest_tera.flag(格式)tera的目的集群
dump_tera_src_root_path/xxx_(路径格式)tera的源路径
dump_tera_dest_root_path/xxx_(路径格式)tera的目的路径
ins_cluster_addrterautil_ins(格式)锁服务器的地址
ins_cluster_root_path/terautil/dump/xxxx(格式)锁服务器路径
dump_tera_src_meta_addr“”源meta表的地址
dump_tera_dest_meta_addr“”目的meta表的地址
dump_manual_split_interval1000手动分裂时间间隔,单位为ms
dump_enable_manual_splitfalse是否允许手动分裂
+ + diff --git a/doc/tools/ycsb.md b/doc/tools/ycsb.md new file mode 100644 index 000000000..b6f922bc7 --- /dev/null +++ b/doc/tools/ycsb.md @@ -0,0 +1,294 @@ + +# YCSB工具使用说明 + +### 1. 属性 + +#### 1.1 核心YCSB属性 +所有工作量文件可以指定以下属性: + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
参数名意义默认值
workload要使用的工作量类,如com.yahoo.ycsb.workloads.CoreWorkload
db要使用的数据库类。可选地,这在命令行可以指定com.yahoo.ycsb.BasicDB
exporter要是用的测量结果的输出类com.yahoo.ycsb.measurements.exporter.TextMeasurementsExporter
exportfile用于替代stdout的输出文件路径未定义/输出到stdout
threadcountYCSB客户端的线程数。可选地,这可以在命令行指定1
measurementtype支持的测量结果类型有直方图和时间序列直方图
+ + + + + + +#### 1.2 核心工作量包属性 +和核心工作量构造器一起使用的属性文件可以指定以下属性及值 +#####1.2.1 重要参数 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
参数名意义默认值有效取值
recordcount数据行数,装载进数据库的初始记录数0
operationcount要进行的操作数数量
fieldcount每行的qualifier个数10
fieldlength100
requestdistribution随机读的数据分布uniformuniform、zipfian、latest
insertorder写入顺序,ordered是顺序写,hashed是随机写hashedordered、hashed
readallfields读取所有qualifier还是只读一个qualifiertruetrue、false
readproportion随机读占所有操作的比例0.95
readproportion更新(写入)占所有操作的比例0.05
target每秒总共操作的次数unthrottled
thread客户端线程数1
+ +##### 1.2.2 非必需参数(对tera测试意义不大,用默认值即可) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
参数名意义默认值有效取值
insertproportion插入(写入)占所有操作的比例0
scanproportionscan占所有操作的比例,tera_mark不支持0
readmodifywriteproportionreadmodifywrite占所有操作的比例,tera不支持该操作0
maxscanlength每次scan需要读取的行数,tera不支持指定行数的scan1000
scanlengthdistributionscan的行数选择策略uniform
maxexecutiontime最大执行时间,超过此时间会强行结束测试(单位为秒)
table表名,tera_mark不支持usertable
+ +#### 1.3 测量结果属性 +每一个测量结果类型可以为如下属性形式: + + + + + + + + + + + + + + + + + + + + + + + + + +
类型参数名意义默认值有效取值
直方图histogram.buckets直方图输出的区间数1000
时间序列timeseries.granularity时间序列输出的粒度1000
+ +### 2 运行时参数 +即使工作负载类和参数文件定义了一个特定的工作负载,在运行基准测试时你还是想指定一些额外的设置。当你运行YCSB客户端时命令行提供了这些设置。这些设置包括: +* -threads :客户端的线程。默认地,YCSB客户端使用一个工作者线程,但是额外的线程可以被指定。当需要增加对数据库的装载数量时这是经常使用的。 +* -target:每秒的目标操作数。默认地,YCSB客户端将试图尽可能地执行最多操作。例如,如果每个操作平均使用了100ms,客户端每个工作者线程每秒将执行10个操作。然而,你可以限制每秒的目标操作数。比如,为了生成一条延迟-吞吐量曲线,你可以指定不同的目标吞吐量,以测试每种吞吐量下的延迟。 +* -s:状态。对于一个运行时间长的工作负载,让客户端报告状态是有用的,这可以让你知道它并没有挂掉,并且给你某些对它的执行过程的想法。通过在命令行指定“-s”,客户端将每10秒输出状态到stderr。 + + + + +### 3 用法 + +#### 3.1 相关命令 +* load: 执行加载命令 +* run: 执行工作负载 +* shell: 交互式模式 +``` +#basic参数告诉客户端使用哑BasicDB层。你也可以在你的参数文件中使用“db”属性指定它(例如,“db=com.yahoo.ycsb.BasicDB”) +./bin/ycsb shell basic +> help +Commands: +read key [field1 field2 ...] // Read a record +scan key recordcount [field1 field2 ...] // Scan starting at key +insert key name1=value1 [name2=value2 ...] // Insert a new record +update key name1=value1 [name2=value2 ...] // Update a record +delete key // Delete a record +table [tablename] // Get or [set] the name of the table +quit // Quit +``` + +#### 3.2 使用方法 +使用时,先建表,再加载数据,最后执行相关事务。 + +##### 3.2.1 建表 +ycsb的生成的row都是“user”+19位数字的格式,如 user9105318085603802964。 因此,如果需要预分表,必须以“user”+N个数字作为分隔,建议选择2个数字。 例如要预分4个tablet,分隔字符串为:user25、user50、user75 +``` +create 'usertable','f1','f2','f3' +``` + +##### 3.2.2 向tera中加载测试数据 +``` +bin/ycsb load tera -p workload=com.yahoo.ycsb.workloads.CoreWorkload \ //load参数告诉客户端执行工作负载的装载阶段。 + -p recordcount=$(ROW_NUM) \ //-p参数被用于设置参数,-P参数用于装载属性文件。 + -p fieldlength=$(QUALIFIER_NUM) \ + -p fieldcount=$(VALUE_SIZE) +``` + +##### 3.2.3 执行测试 +``` +bin/ycsb run tera -p workload=com.yahoo.ycsb.workloads.CoreWorkload \ + -p recordcount=$(ROW_NUM) \ + -p operationcount=$(ROW_NUM) \ + -p requestdistribution=$(DIST) \ + -p fieldlength=$(QUALIFIER_NUM) \ + -p fieldcount=$(VALUE_SIZE) \ + -p updateproportion=$(WRITE_PROP) \ + -p readproportion=$(READ_PROP) +``` + + diff --git a/example/onebox/conf/tera.flag b/example/onebox/conf/tera.flag index 99f62b45e..37329893d 100644 --- a/example/onebox/conf/tera.flag +++ b/example/onebox/conf/tera.flag @@ -7,10 +7,10 @@ --tera_leveldb_env_type=local ## 是否使用zk -# 指定使用非zk模式, 但只能本机访问tera ---tera_zk_enabled=false +# 指定使用fake_zk模式, 只能本机访问tera +--tera_coord_type=fake_zk # 指定使用zk, 可以跨服务使用, 配置相应地址和路径即可 -#--tera_zk_enabled=true +--tera_zk_enabled=false #--tera_zk_addr_list=localhost:2181 #--tera_zk_root_path=/tera --tera_master_query_tabletnode_period=1000 @@ -18,3 +18,18 @@ # sdk --tera_sdk_timeout=20000 + +# balancer +#--tera_info_log_clean_enable=false +#--logbugsecs=0 +#--v=5 +#--tera_master_load_balance_ts_load_threshold=1000000000 +#--tera_master_load_balance_ts_size_threshold=10000000000000 +#--tera_master_meta_isolate_enabled=true +#--tera_lb_load_balance_period_s=60 +#--tera_lb_tablet_max_move_num=10 +#--tera_lb_min_cost_need_balance=0.05 +#--tera_lb_move_cost_weight=10 +#--tera_lb_size_cost_weight=90 +#--tera_lb_debug_mode_enabled=false +--online_schema_update_enabled=true diff --git a/include/tera/client.h b/include/tera/client.h index 2ef68638e..80308a911 100644 --- a/include/tera/client.h +++ b/include/tera/client.h @@ -12,6 +12,7 @@ #include "error_code.h" #include "table.h" #include "table_descriptor.h" +#include "transaction.h" #pragma GCC visibility push(default) namespace tera { @@ -101,6 +102,10 @@ class Client { // Rename a table. virtual bool Rename(const std::string& old_table_name, const std::string& new_table_name, ErrorCode* err) = 0 ; + + /// New a global transaction + virtual Transaction* NewGlobalTransaction() = 0; + Client() {} virtual ~Client() {} diff --git a/include/tera/error_code.h b/include/tera/error_code.h index a03df0905..ad6ab2b64 100644 --- a/include/tera/error_code.h +++ b/include/tera/error_code.h @@ -26,7 +26,22 @@ class ErrorCode { kNoAuth = 7, kUnknown = 8, kNotImpl = 9, - kTxnFail = 10 + kTxnFail = 10, + + // only for global transaction error + kGTxnDataTooLarge = 101, + kGTxnNotSupport = 102, + kGTxnSchemaError = 103, + kGTxnOpAfterCommit = 104, + kGTxnPrimaryLost = 105, + kGTxnWriteConflict = 106, + kGTxnLockConflict = 107, + kGTxnOKButAckFailed = 108, + kGTxnOKButNotifyFailed = 109, + kGTxnPrewriteTimeout = 110, + kGTxnPrimaryCommitTimeout = 111, + kGTxnTimestampLost = 112 + // end of global transaction error }; public: diff --git a/include/tera/reader.h b/include/tera/reader.h index cc916c14d..08615f4d8 100644 --- a/include/tera/reader.h +++ b/include/tera/reader.h @@ -31,6 +31,12 @@ class RowReader { virtual void AddColumn(const std::string& family, const std::string& qualifier) = 0; // Set the maximum number of versions of each column. virtual void SetMaxVersions(uint32_t max_version) = 0; + + // Set the the max qualifiers of each column family when read this row + // This is useful when a column family contains too many qualifiers + // If this value is not set, the default value is std::numeric_limits::max() + virtual void SetMaxQualifiers(uint64_t max_qualifiers) = 0; + // If set, only returns cells of which update timestamp is within [ts_start, ts_end]. virtual void SetTimeRange(int64_t ts_start, int64_t ts_end) = 0; diff --git a/include/tera/scan.h b/include/tera/scan.h index 45646ec9d..c9023f9b6 100644 --- a/include/tera/scan.h +++ b/include/tera/scan.h @@ -79,6 +79,11 @@ class ScanDescriptor { // Set max version number per column. void SetMaxVersions(int32_t versions); + // Set the the max qualifiers of each column family when read this row + // This is useful when a column family contains too many qualifiers + // If this value is not set, the default value is std::numeric_limits::max() + void SetMaxQualifiers(uint64_t max_qualifiers); + // Set time range for the scan result, // which likes the SQL statement (SELECT * from Table WHERE timestamp in [ts_start, ts_end]). // Return the newest value first. diff --git a/include/tera/table_descriptor.h b/include/tera/table_descriptor.h index 8865d5a9d..4b464070f 100644 --- a/include/tera/table_descriptor.h +++ b/include/tera/table_descriptor.h @@ -54,6 +54,12 @@ class ColumnFamilyDescriptor { virtual int64_t DiskQuota() const = 0; virtual void SetAcl(ACL acl) = 0; virtual ACL Acl() const = 0; + virtual void EnableGlobalTransaction() = 0; + virtual void DisableGlobalTransaction() = 0; + virtual bool GlobalTransaction() const = 0; + virtual void EnableNotify() = 0; + virtual void DisableNotify() = 0; + virtual bool IsNotifyEnabled() const = 0; ColumnFamilyDescriptor() {} virtual ~ColumnFamilyDescriptor() {} diff --git a/include/tera/transaction.h b/include/tera/transaction.h index dc63a7842..81722f35b 100644 --- a/include/tera/transaction.h +++ b/include/tera/transaction.h @@ -15,9 +15,15 @@ #pragma GCC visibility push(default) namespace tera { - class RowReader; class RowMutation; +class Table; + +/// 事务隔离级别 +enum class IsolationLevel { + kReadCommitedSnapshot = 0, + kSnapshot = 1 +}; /// 事务操作接口 class Transaction { @@ -47,9 +53,36 @@ class Transaction { /// 异步模式下,通过GetError()获取提交结果 virtual ErrorCode Commit() = 0; - /// 获取事务开始时间戳,仅在多行事务场景下有效 + /// 获取事务开始时间戳 virtual int64_t GetStartTimestamp() = 0; + /// 获取事务提交时间戳 + virtual int64_t GetCommitTimestamp() = 0; + + /// 仅全局事务支持 + virtual void Ack(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) = 0; + + /// 仅全局事务支持 + virtual void Notify(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) = 0; + + /// 设置隔离级别 + virtual void SetIsolation(const IsolationLevel& isolation_level) = 0; + + /// 获取隔离级别 + virtual IsolationLevel Isolation() = 0; + + // Set timeout(ms). + virtual void SetTimeout(int64_t timeout_ms) = 0; + + // Get timeout(ms). + virtual int64_t Timeout() = 0; + Transaction() {} virtual ~Transaction() {} @@ -58,10 +91,6 @@ class Transaction { void operator=(const Transaction&); }; -/// cross-row, cross-table transaction -/// 跨行,跨表事务 -Transaction* NewTransaction(); - } // namespace tera #pragma GCC visibility pop diff --git a/readme-cn.md b/readme-cn.md index 7e136a26c..7edc5362f 100644 --- a/readme-cn.md +++ b/readme-cn.md @@ -1,63 +1,48 @@ [高性能、可伸缩的结构化数据库](http://github.com/baidu/tera) ==== Tera是一个高性能、可伸缩的结构化数据存储系统,被设计用来管理搜索引擎万亿量级的超链与网页信息。为实现数据的实时分析与高效访问,我们使用按行键、列名和时间戳全局排序的三维数据模型组织数据,使用多级Cache系统,充分利用新一代服务器硬件大内存、SSD盘和万兆网卡的性能优势,做到模型灵活的同时,实现了高吞吐与水平扩展。([English](README.md)) - # 特性 - * 全局有序 - * 热点自动分片 - * 数据强一致 - * 多版本,自动垃圾收集 - * 按列存储,支持内存表 - * 动态schema - * 支持表格快照 - * 高效随机读写 - +* 全局有序 +* 热点自动分片 +* 数据强一致 +* 多版本,自动垃圾收集 +* 按列存储,支持内存表 +* 动态schema +* 支持表格快照 +* 高效随机读写 # 数据模型 Tera使用了Bigtable的数据模型,可以将一张表格理解为这样一种数据结构: ``` map > > ``` 其中RowKey、ColumnFamily、Qualifier和Value是字符串,Timestamp是一个64位整形。ColumnFamliy需要建表时指定,是访问控制、版本保留等策略的基本单位。 - # 系统架构 系统主要由Tabletserver、Master和ClientSDK三部分构成。其中Tabletserver是核心服务器,承载着所有的数据管理与访问;Master是系统的仲裁者,负责表格的创建、schema更新与负载均衡;ClientSDK包含供管理员使用的命令行工具teracli和给用户使用的SDK。 表格被按RowKey全局排序,并横向切分成多个Tablet,每个Tablet负责服务RowKey的一个区间,表格又被纵向切分为多个LocalityGroup,一个Tablet的多个Localitygroup在物理上单独存储,可以选择不同的存储介质,以优化访问效率。 - ![架构图](resources/images/arch.png) - # 系统依赖 - * 使用分布式文件系统([BFS](https://github.com/baidu/bfs)、HDFS等)持久化数据与元信息 - * 使用分布式协调服务([Nexus](https://github.com/baidu/ins/)或者Zookeeper)选主与协调 - * 使用[Sofa-pbrpc](https://github.com/baidu/sofa-pbrpc/)实现跨进程通信 - +* 使用分布式文件系统([BFS](https://github.com/baidu/bfs)、HDFS等)持久化数据与元信息 +* 使用分布式协调服务([Nexus](https://github.com/baidu/ins/)或者Zookeeper)选主与协调 +* 使用[Sofa-pbrpc](https://github.com/baidu/sofa-pbrpc/)实现跨进程通信 # 系统构建 -sh ./build.sh +sh ./build.sh 参考[BUILD](BUILD-cn) - # 使用示例 - [体验单机Tera](doc/cn/onebox.md) - [通过docker体验Tera](example/docker) - -[主要api使用方法](doc/cn/sdk_guide.md) - -[客户端teracli使用方法](doc/cn/teracli.md) - +[主要api使用方法](doc/sdk_reference/README.md) +[客户端teracli使用方法](doc/tools/teracli.md) +[集群间数据迁移的dump工具terautil使用方法](doc/tools/terautil.md) +[造数据 & 读写数据的工具使用方法](doc/tools/benchmark.md) +[性能测试工具ycsb使用方法](doc/tools/ycsb.md) [其它文档](doc/cn/README.md) - #反馈与技术支持 tera_dev at baidu.com - # 成为贡献者 阅读[RoadMap](doc/cn/roadmap.md)文件或者源代码,了解我们当前的开发方向。 - 完成[5个小任务](doc/to_be_a_contributor.md),帮你一步步成为tera贡献者。 - # Become a Committer - 成为tera的committer,你需要知道的一些[规则](doc/cn/to_be_a_committer.md)。 - # 欢迎加入 如果你热爱开源,热爱分布式技术,请将简历发送至: opensearch at baidu.com diff --git a/resources/images/global_txn.png b/resources/images/global_txn.png new file mode 100644 index 0000000000000000000000000000000000000000..0e6e8f950e1c6fb8f8d5883e701d3639f75496cf GIT binary patch literal 197896 zcmeFZbyU<}*FG#b#E?UabR#XOpmYgHs(>(bgCHo4w16O>^w1!!(%s#m5=u&k(%lH} znVIkXyWjhM-rt|^f6rRDmWwrf=A3=@+56hpzV_#xhMEE)-c7t~*RByNDat*$cI`U+ z+BK}3xY*z~7hIS<*RC;MQ<9U`bT`~cg*K>aop;AGm2)OiELf^V_0itE6BR2HqY@)6>;X#ZFtgzY7|_>w0d_UV6C- zZ{1A$T|fGIW7e?!8@qedHE862{)jwcOwP(F6(D8A_^%)7j7~KvOxFMWAtL)Y=y-!O zjOD*SjmLZR-@hV#nQ$!#(JrM+``=%xVO>i6U+akUYlm=F&|>#3DEdME^G7Dk`@eq$ zp=JNbRiP%T&yQTmNw8GKtDfEN85!hqd)Z*TY6@i^c13G9yCwsq8E1A^AJA#F-{91D zz|)2qV-9sObsPDK;uvsXkP5t!st{4lu#giP3h^Tiar3JCp!`!{U3&Y#Mq;~mhPi-I z08(b5%;1CiBjeXtbZp>{-boR>dZ3)q`I{pK9#?e|#D#o_e#h?m$;aQXpO(D& zND{&%_3{F%&}q;J*p&sh1D1EIBHa4+O%hg2qHy)GSC6f0KO|5@O1Wd+hT!7yx-uip zuZZb?W8qGaHtA%r7Ire~v3}(6A{I+5R;i3m3Dj8_vd*;x&>F>hl+N3fbKr^yNEjT) z$n?8(6!-xPj^iMW*AfPyb&t#%+%s(-u~dh>H1?|7;s`>lhv;HJk;YF@n)@@9wa^pA zvQSX|jr4SHx!-un_v+jziu)Pgt6sKvbDM_ebpt^i$=_9?EBcE6d7AZa5s$rW5%-;s z;4g!F(j}V}8h5HEko&=SY~AU9gP6V2c8rpYT?H0z;HvwU^nvRJ=h|o?_XC$Ta)sWZ z@{e2GMKv-7`n7_x3moZQPsIKnjAUwL$PD3|;2~|qctTuB!n?x2a(TASzq{AZuUGB; z^Yc@MhSQZ~OvzW%&LgUsuNJ!&cDotA55M~9oQPk#Kkc>uHS}D2m~6_2CA`Jz>+c&}HzxzT-W|{S8dbcQx)-qgG*6A<#bib4rTxl4Vy0HUHnq>y zpe*sbU6P|*4Av*t z3a8e#D>J%KNtZAXJKrc@$cm6CllxI*P~WltUD5&iSzY3^eiH-X*l0PSiS> zfz?9!&x)(oJhvN<3-0{MhFJmYlwbOEgpeR!oCwL8uQ(FXiWrOgfx}fI=h4wf}Uyyclm4bvkJS3;WMW{Qy)c2&iRzVDaiD@6{d>?za^sMGJL(trmPXiqboQK z3%NO9J&(asKG?P$`8{pAEm^0dyv(8>zDT!=B?H~f(kV4vDE}@|E_aju12(}8_AOs6$2|)@TptSqpPcFoNYH8a@wUa14!|ZXh*yUBTJ*(| zT%C5JJ=2}jiM?VVWAWu(6-dbA2 z1WtXuQuFf@L`f@+SwpG3kC^pb#?p-EHbuU6VHrJN)7cs~H!osH-+4&hbZ@CQ^GVJl z>Y0^EmqlqzV%ei048o`YoLD2@zjtM+-G%g!C-DMa~ffL^6n3G<--fSUuhf~m?(MS ztoHSr_lB|*h}U_)gIiS)TyfWSbZ65Up4EwrAuhCwvAS(!egbX7lp(PNMT6A5xC!9n zVd|29e~?-gH=TWzV6ww;|N`_8!s_2 z38FOI`$!P$`R)Gi>}26r3v&7NV#cl59TXj^9jqaIVgqv)6o?1RZ%uNEntyLdDERZC zLO6oYuJa;l0g@)Z!;B-O!1gE>#^12>h9Vw@U$Tbt3X|UsW+rhm7Z6gq3gT}1VJ7Z( zD`K%7zfU@)nH07%{GL^K-(OVSXx}d7sdrrN({9wOvMWvdo94a%9V#)AwbkAj)pO7~ zi0;)yW&o`(Lyu%LC~%qz0x1#SrLLLW!(D)0ohJr>^IT<=BM2nSttj2d^}V`WsQsN*E=TxA!;4X--;BY`*M@IyODWx`Dc z9?iF1$2(G;kinNM<)Ap1+OGXg&rL|rxbp`46`s)XiO|B$vpWbuvjMYv^!$zjEJ6J~ zECeKXd@sGYM=cDKZmeS9c`z02d<^7v&SfZl{yj|`)0E&)fs3VfNw@wP6r^F6+Mbim zmy5wPi$|9(9h^M7?L1;>!X+xN*GCIS`!fbbf~*o0OQHQ2J}2`y>sX|L)O-bV(ib*k zMX84aluFh?5)7FV179Yn3`I<|!{VJFAQ^n4Cd**OD-?k#z?*`DF?defW$`X|yJ-l; zvZV%%@8joY0iSzr>o-ZmwYdgv&+|qLkB!pq0%3-usmXyAu-kG~D*c!KZ0?S~zr9%Y zk8HKTVX7zi5nRocK_j^9b|Mr=xl!he!@zj9b1iULx}=v6$5Yf;lHReXcID>!dm;Ly z9V^_zLE)h)!Xr{fiHnqpAyZt~Meu_UuljiAp^WTIGsbUm;~ORMiAmi>!(ysrf?&gD zwJLvjUjG?*FvA?YrCe!7ZhGBtYmShwuj@LFDf5vN_#0PcE$ddb^JT&fY9ULNGsn~c z&)yrhL~Zz{e9?n#VfmTzVes$rA-e9;74EyHyXjXK-F@Bd-YJ``AbnU7;yt|?q)i*L zi31(L3rgb)nV{!^c9h+>``DpJqCYO`Cw)2~^j*h6r$n2I_`VL@tXaP5YsgU~%x2a2 z6HQ`&&+~`)ZLpE`BSD%eVb6!$nqnC#d1 z@{IQD+$j$Vgnh5Bx=%FjSy2*BO$^DPLX6D$vw|EDPI=E~D(_{`H)|6c<3Hii;#on# z_ycgJrtyA@8IRv|$|ap#6+4)?%B8z=ZyJeVf$kH5VKV(GP0I?(AdZv6p=;NoEGuAn zFEw;(0@!611OKLw@!ey>Hr2yIh)lJ8b;aK$n7Y4n0ldp(mJ0E3k!_#K$jhX0smkhn zw;p5+-Es>Zj1J{=fC!(?hH$cJSa-g=LCg(V>n88q*wU8IxH8-{Uw51edmc(guPye8zB_l!QS=bm)?%DQAhX^6Jh$WonA+z|taX=t{JaEEose8|A z;JGyI{Qa*pRu<_Ga38;oUvnS&aQzM}e@7+ociw<00e|?*J}x5sHhf#2k(C^U7XRJP zR(>+}c)VDO^U>xK)7Um8v@n`d<#@gqgbzYE2{_No)G0gCcT3fHZU^?X1z}wEaTkaD z`0<11SkU5-kiS0d<#|Ta=?aZ)af0_e;|ClDuR*x2oqEQin@)Fl^@RHk84c+4GpuyR zE`h`M_zpfha{-Me?y~Qz#g7QIEonruTrBxRnWVt+-d9WH6~hR(7|@>&@iv}yD^u(1l!aRfbkH~4AbwA#+sU4i5N^e~py0fV1Kb$)ala?qYW#4%tRrpO?v;88*I<6cZh@ zc;ermyJ8-D)^JjBQM*pY?kTEziH7N}>jY{_9c2pHttOP?Iv-TC=lRA7j5f&{=NQzF zL%&Hnh2Fq~TxEQq<9FhOm6&06In|y zzC~36#9!k_qQfS%mY$2e>!_GIsi&%JtKuel{!{0{(omla zOASGm!O>Ev?}!ULGKp(tM}7yLu?i^mY;`c8kQh+*T0?ma(KWgf3#faoZdF8E-sNWZ zH-;gLSv7^90up~}XUGc}TOgV5Wj{1ub0ex#!(=sv_*;YT?+>xeB*9>VZG-2=e=#`< zNPGTzjL-^;b?!Rmm&HeSQ;8!7v5CrsYKY;bHu|mXV~Eqn68rq7^haKGOKQ?C0zC z#j?W0P@gQEb9waIjtKR^Z?xa7p-7hDBmlL}ZQ{^_H;`XUsIPX+o7d$+E)x0O|GUWl zwdDW$J<~FCvf6D$=o&AB`w7eug+N7TaHDKlt9K z)bbJ?>e@YFU-$2#1K3yzfoqX#qM!w~!)#Nd((Xd%u~vc3Zbv7TQA#tQ8){W6J&w1g zO()7M>V+LV&;2x1e?L`Osng0mq{WR#N!FkM?nx8(w7Yvhtg}Q# z1SDFiaK{J~sX99YHS+v8;Odf!5}) z$H~t8=q%fLTW}~-2_6NPdE#)ro5OB9v1AAqK4l6Mq~2xS)o!N2L$*7dTIlZAEiy-h zy(g&qTsfwQN7XT*Xv1&}YH7=3rlBFqo_=~}s|54Mn~a(xr?~G+iw@!x(vQ6_Pk#$_ zeB;%t3ITj<*c&YT(9`|jVUv}%dPF@j&_Zg~`}mf;Q&aVB#gW}@dTsd609O@sK^f1f zS5LtU%X z;}EuJKVRe#|DuF1o9k7@9`o)s$;|{UH!95ZFN;5&DPL;rl-yKRS7c+#kNQz~DNjH40n1ljKz5mdqI2=pfOk5Kb8*wSd|U zDIBIr+%LLFrr^G#xPNS`WylKSL2sbs4_o&XPY^=ik^|Xc7!}kDH+i_enwMTROh9u_ z%&cI@X}2<0wPOs67Qg-eSf@HR;a%$P{+tLe@wP6w39LNwu(G zox6<;>B51Oa-&`zX4uMyZ3NHAJj|-XsGyJ_RD7c%x9>d=QqG{@%Cutv)Eu!g`Elr{-W`osJlWS!j~f;JG@ z5_>)qhB_$-$F)oqj7iVChV*;fY-mptf*=i#PU+r?mDDxrb!%&)Y4(rFe9i+|Y&c-cPQ|*DbfyYy9!DA`Y^E zzA4={_sXNB7?CHELJ9wCUVik`8qUKcN8O;vh>HrIb&LW|?Fx3`b&qEpdPOI9mjQJe ze)EpC##wX=ve%R1HsfKsGuJx$cspUWEn0?Wq&>Y%+;^vCKn_MpZ7ho?#H&#DdXzTs9f$JMz$dfa`pbLfia>IBREIV5S1SY7xqIk1`r=AQ0_u)TDE&$ql~PrjX49k5vk|A; zfJS;1bD`aD=#wuW&%RHC4zEJL4Ac|dERyD)$FbKi)|GH5DOr2;jo@}Yu|Cb9SPi)T z&-(R^Nm%Ff(CepEBjCxNigmo-?^9c_!gcz1kk4_u#pkr}20c>&e%z@l+aUi}fpaTX zQLT~0wY>Pvwz9SdHpfh-PR=#Nl%5lwxZqZs$I;!L2K7a}W6 zr>_PoU){wJit?*PAAytjVVcEZb}w;XeNIEE9qgEYZ)Lc(qAGiE&oD2y^A-BeEea+j zDl#(u<7rrBk1LO@m%pc91%H<~FHlY(Y9ltsHva+XQAL)nd{Te(ohV9i@`&kUj}<-C z2Qm>oTrhX5*$94<}EB7LNb#3#gp zo@Z*9L;pN}nf3kGp%A%q?_$G7`?|wXy{GLtqFyIEN9#rP_Er^wC!VA`xM(EGVozgb zDR1-G3B<8=bMkf03eJk~^_Dw5TIi}i|6A!x(j5^1;NoLs@L&8?pE>IKn=nwP@^I7%>0!uz%FV%PhzZhZlvNWVn1m4zA);Ayj&YPVE-RbsNW_dsLPOVJE^ql zHOPshUlWD$Uc`EFEMafNl_I_#uo`os7pNiZZ9$1NX7PhNsqY@4j2ZEqkt|Zm4~gi0 z!Qep_0sJBIcGBjAH}8CMDuh>N`Ur(_7+|@JoPKvfp3XPlz=8b`8Gwx&43)xup+j8# zDosB-Mll&Wrfxi3=eXin6Ku2Y$CCB*yFiTg=Bg$?oXp-j{3dfApn%3H^sOD9Dj5L0 zoEUcBj6ol)dhT{XGDL*~kPnVetutQr^`0XdJoc-^UmsZiME0HmiaN(Q{hbbY#m2d} zH1-J$t8-y5dHcoTxyNpkR4k}%@^!5xiV0((j42ob@2hgC%qyZjt1xno=tatpPe19 ztLrzo@76XQ@O9ksXj?a|#hjnG{Cm!yF}UYxJJQU9CUxv+0T+Z<_{ZCLC{v!zg6r3= z;paWV=i}S9oqCM>QBu*+PnXvVlK6VJAem>OIgjGtO=jI6E_{x)`L-p-L9^8rMB{0- zjOF9eTwikd#&y;?CCn~K1~&7D{q}Wi$_Pya4ty!B@|M%efX64$!nQU0+6|mU1nnd* z-Mr1xe(^mQe&^uzpgqcNq6~WarofezxYl{F`Gxv-K^O`)P0#}JEDGs1q+AtCq;0gI zE35Pl={hd}`0)n7N3AR`%I`bcsuJyHxuO(YnkTPE3v>xVy=0l|Pox6`09-?LF6%jt zzN6R4;Y8OgNDSFgxydt;?ga-%^*@ido4n1#vg9aTUC-IC&W=9h#3sn+tx2sfnz|*g z57#h#TaP3F+z%)j`!9r+)867P#`;F9>oZg|y662`9a9&1 zw*ZZgA0xGgwrKA@>&LrhmiYkx^5&|i8-kjvYWT=}Gd>@`>D|4Upxl-2#p{#v0jpI= z`jJ6p?H~#A0~ymeD}LOJ{;b$Sh(dy?H22B|{y?7hUl)99|Es1{x;L7QI6FH#B{q}W z0o2k?^52tE$m?ARF6{DBl^kz}sDoNPE@Y&Yw!qlXs*O#EfjGw$F3lt;A zijQkl>DP9Zv$xIVOH*+u5+`da`fbnwbmX!`jC6+#T>+=p(RlCPMEk7m<@vXn?dGc{ zfWL#eL{`VekAICUBb6Bo9q+PId7kC*)ONfyh+$VH`V-sQz&b~QexQQDkP4|H9_oSG zPtLHo*)YOTS5~0_8>Un9(j@d^+(9}|<+iG|((KV4yYZJXPcKVpNd;0+)v$vIeUbvE z5*vyvef0dwco~BORRm$t=234gj!=N+^{1`ayX;WjlzS&YsZunbC!F%!0I`UH{(cKYkF7mQ^@c2)(Ut8*Tjw*MY`VxOb^3Q{+*aRGYL}!*PnW*aeo5s(~q@{J~ z90fU1taJQm3p85~(~!PIpy`$gY%3d97(V)fBkB@IMS(7>VMM#)hc6meU6JG{u+Gt< z9l+`R*JwpXZFE6Xa46I$jH_Yok(FwlBZ+pv%j`$RiY?S=%E;M%QParP5c{bc$#jxt;>(edd#nXi!r zLmd6+Mr=!a(m^KFhgZkj8dgX@4hmMkuui=CXB`|W#7c}ls1FyO(8S+Yl8Q%pS)MKN zezFpH@_=iGRx2Ya^5FysHnRGkpl{8Bt3vHQu#kLlYt<5TW@*h-bx>lwhhE6|&DhUMElTJp5K~pk z;{?o?I6_&$>W(tVfZF5lx9@p`PQf`w&tg|F*8KD6=)B*ouvFBkbH8T6W|Eg*HAm8a zzZgXd{w&%j+6Od@<^9zh$W2i}GDA;`5}@zd{Yq=JX1a$926!qQ`4b9{pzH9>0H5D#+#hz2TvNNVo4MXvJs!j>hjF@IQ zyt93tDGjngl-Tw!JlyW~XeY^F`uyX~iM-w{7zXIWn`S@K@EI^iCOSHKgin^V9SEjQ zLU*mIP>=KO)_HRa@?1%_EBoCSjf_hD%jw2FLzB>qih%zM!MFZ-;NSG3xJ6}amm$zS zd5#tv`BvV!JtMA%GIDE$HJlbAkQL+&AM2$lef^;Ur@!8Z*5WokG*Ze7?KUR`mVBhR z*ywO6@E|i1=f0P??Pvk^ub5^10JHDf`zv36_p=46F7jw0tr|8$U?~*aGCf>myVM9` zxK^ZObL6p{>}HNG>zo2Q05K#EbCA9aMF*hIx%Zk=M)m|}A4TXU@Fw34)a-t2xsBR) z#86f(9^@rw7mbZ~LGXn~l?**QD8OERKxKmM6b_zukP8KHI5}s2)(tF|>!KfCYnB2L zSijrGc+~9YC+cZpw}8lw(Q#$q*52=D+(#Yn$V@@T%aroR%rt3#%RHH$T6Wh-TrDol zk#+1kkYzQvw)kP>^Ve^bHQq)PA}P*J9q|}Xzn1>tF69xRSNbHejFD`^3_U?}*V`P3 zgCGc?SdeW)$GYCHJlp>Qoux&Ys;ds&yFVgpfovtl%Bph+k8cOzrK>= z?p+s80QEMAzdkHCLYZ8f!FO96(;}x=oaPyJkSIt7WmP*ncC0ao7 zY@`D{BOkk33a47?1GW^He8yp>K1W}L_r_@9R4I^*64D7-4*aNS7G<(Jts2XJ=1`4k zhiZ&wJYwHv^rsuPJn$E!uSjK&@f$-&X~W+N=SnTA=u04+JeyK>-j~&LE^&3S|7DYI zbnpMgW5R`=RN5{&NyzlX5b*A@T750Z@$X0Qk^z-x_Q^`JL%~E53CqI}AUK>(YJEZi z2bh~2Sh~>tU`U~`ZYemfP;yZwalWr>yXwUh`;u^52jBn;y!e|=GFu)WB?QSfTJ$3* zgsi@iBl$y+_{;i|CA{kZ59;uX_h+$D^N2(9-y`c=a}R^b9T$M^RWDzlJO|0Xc5dPb z-BkVPp5)R@^MdU?3x+Q#3a$TDiZhJz6GG;RFT^ulq6;dXr&Z(%fs_zp2$^MNc1zE- z^?z}f;1VqW?PCRm(z0+#B_aXn3w4^jYCmayj*cFL1J=4`yXjmGh%x2z)KUvxSJ_WL z|C%Pxb^ZpQ54Lym+V=k9c&QmyUkky^VuYmq*A{7OAVlsi)8jiP^S#&&J^^~qAhAq? z{2>-xUVn=~M~2B<&Y=7c+&`V+O#71%ym@P>K(Xgy5;Imb5%&Hr(N|r%3d+z$T1#;itqKXjBZRU^~UhFjuq$%Atk8r z1$?SY`_?yDNRjm93o0e+4C7k_V|Q(am!v4;P;|oNWyv)sTyj4B6W!DMZ#kA`9<5Kg zftvBJ?dD{qERYBbwa{|gK%pmTX!rMQagK!CK_~Y+@G)6kItn=GP>=(8r8D;NZCT9< z>&Lba*Vpoks}R3~3>}(Ib8D_H&*K6qkE|O3`#9QqQ)iMReoj}W@Nw2dlK!{uZw6(= z9cMM))@`zdRSmw0j3F)_m8PwG2Ix;jD%T6OoWbE7+J*~&L{GB8!U&hkrerc^`o$wX ztp*ZyEA^9JJMDZLG()1`X`K7h7fP>^;-dW&{z2`-F?YT?%UMr&Rxmzi)O_;f2~YXP z@*ADeVBP$9!`DhZ!P2kR3#;Kc<_8-SBXAk_F$1?-P~KY2M?Kfs3$hkBFA|2%K)qg= z<2&Cc0ffeop!b$idG!VOIYP1BCOAU;DB*59mY(m~Do5d8-5qi643~?O zU4ycsLUMRgqp|0GQq_laK||w5-pA9uLQj; zQ(5OI&>m9k%qA{MM*3WES>I%4iL8Zu9nV8P^+o4K!G#YEPnYkdAz6PI7)BsO=O`M9QWer2d^GGe zs)LXEh_F{^eBGER=fxm9qbp>ReH#KINvM&}sr)bZ#rLLwd>=LZL@arsaj-S5$74|U z}**)_M}YzrO@7jrtDGxqvzG%W8Pv7Y(Sv?V2*1v?AYjL0EG_WBAD^c1yDW=|CUwkf?G&%?r?crDLObGlPl zptJt8dLunHDsVKvxaoBk`P=ILL>#Y%6JC!9;-^i>zAaS*{y>a5Ccu9TY|Ztq^gp*y?F_I?)wT!t<>QcfWMkK#2rE8Yxw8<4zl4r{=ENJGwSor2%-2i^GPitCUAT?8cP%l;v01AU2q(ENBWjh>m!oIe ziyQX(LYpto@{77*gZiWbsp#_Y0mGyMW-lwce6&1P)So7Y^OaTwY5QYh!BZc&a7E}} zRz3iMxZ7+=6D28vdN(C+Rw zy$b76qi-i&6&Hh1eY&{NlX$$1? zCh{b3?~#Gm5}_VnR`vIyJL{xArYp;=gNd3^w`$}pln9zEZe~#{fsyXqLVKpVry+Ya zXNCUr-vY`w4l&?gd^sGSzji5lHA^8PEmHm`3h5!2U5%T~OIN@bPh|NkhW*;F6zyV8NP#FailI21^i43M1Va+_VKUq|rsKiNT|YSA=&~(t zbQLFRk5q7B!JQKbzipy|>RX^ztt53UK;SXF(gbE}eC|6g^-#%f{9;#cxM)|c*S!aY z1fq_C0XEYQ4b%iHuZ9D*)1E16Pg{olSH^#X=V=8N*n-_K z4#>>`glF+hVg`u@Db7&P(x$(-*#906>ZPJ*JRxd;d{(3%E=x1zrhE=AIVC!y*kL32 zOfcov4l3^2A5y@_{R!Pmjl2)FCT1EvC7INV8@561Rb^SVr1a*Th+a(O6&OreTxe~# z2;9{$qX*@*DFY)$0QANcbAS*W`N~1EO?2n z+3M`AaG~({n*DSg9e{W?k20FGy@$RZm$}Q&cm&M0hADP}N&n%$I zTi=X7j@vU#4WfIxwB15i)%8TdIx23N;gWm1Z11vPI^B$q3ltrH0Vn#Z=;GegiH=~p z$g73yf%^9h{2zjDjzqUmqbtjMKmjOa{*NV`XV=%qiX~qG^6xix`*erL1eil=1e}jA zN|E5jS5d-`yRfW^v)c7!<*;lfW3PyPo9y;DOqnjD6A|OvA}Eeql#R!$>ucm;l`QFd zIZk`uk^Jed!BKD4-VCe!s7c&`iY&OxQRtv_j}XWYgDB7a<{?}NF0JRMALvB)XzcTT z4{#gQ9ke0)5f?KLUQ6!+DaH(ar7 zGws>)Z&&T{@oqm`n&v8`jSa4t@4#z13>w+k(B;nVy!@E8_ttoV#ewJ0^U;h>om+lG zbCj^*xO4RJpNa>${prNL=WZSf+#{3@ia4T9J@wTMr7A;BTs>D4j1B{q^KH=*UST}3 z#Fm~rx2<E`P}!%~sW`?m?8H<|OJP40Lh zE47z+p1UH)ismQBtS~AJgN*ECNxrPacMwb@hu2A6=yxEun(K90c5e1E#dYebM~|Nl zz=NBLht@oFfjWPfw{!PqqO~PPeI$4s2Dqf{rnwz(>2M3CmZw!_f|lNktHWeqP=>y9 zZV$72pJE#%V}$!I8N~kKI0UIZNqO!V;Yd(6G4F*~T zan{pyhfiuH*ZmcjZE$(THWsBB#t;?hFbUPA_qw@`d-k^+vd^Vw5-iSGdFeWEhsR~` zGuQ8XIK;YTTeu}qw?ChZd|@nv-Z@f2UP|>P*$YQU;otlYSu#?XtElN=%4I-ya?GyQp~Z%oI~Fr#Krn`p;+<)_PW5yNg8D1rtBr^Ci zeJe4#)+Sv9c@ztMb{riv9TuFjCx^8S?U#xQuX!z~x2CBf`?LC#7A}@{W`3y=HLgR`2L2XY@Eok2J2FTiFJLDP`$l);c zgq$)foee3_OtND#!Dk&fGPZ}^xncHBi?`mF*yH2 zLf>MEFbao>8ccsY#8)O`irq*QsMn7ZEZrbJrR=W{YS`4RRaw!Mm=J>3E?ouhNl%_H z5oVApS-1KuL9IA$$OMAXluCqeSMlW~N~&?Fg?kA_%1fSxD9?&)g9eb{Nfpp(XhFqXe@a-f+kr39PfZxPp96B$8vl5mK;$X>AH=PVoudt8VHs%3NMU-O?xq)rr+5tBAH zE{fUTAuRuw6d2B&7A{8N3Ii3)3$rAx^D%F zq+t3o!1EfN1>`Tq)z|`WoIguy)VUV|2!jfNe8E!Z9V-vBD=O2Wej5qRcu64!?=K|$6aIUBGGA+Ly#lWEGRSV%7z+K4dDPg!LePIxN^!v_O^j`ae83W5t zDJB2X0?ho4ej>`v=5I`fiZcv$X9sDJGBo{!XHQkcOWbp#4$MVk1Bym0GCJDvNoY|6 zs1>@)cnqUkkn_8t6x>JllNGnd05aRx{-SB~`B%(co_ulLjJXdCl1)gxj|h~1n1WYI z>V|RnWaf1oRby)775oLp;sz$cmnOM{92?T+6aCn?mET@TW)9XI*|_SAQuPOUl^h!< zsO%yVwf^2?nBw4xAn4|BqSl>wr_LxP(zRDuq;sP)1|h>u|9n6e^BXLLGB4|AX0jyw z^j``{fP;Icg}xrId`%t`cU)531miUOK$KIC2yF>p zbxi4h%;k8jvQNh#E$;jWM)XLr&!wBrO$=UQ-1?)`ta~Bw!K}XHyZHyFs#%KNaqa2= zvqTfk`aNfL2w#=o>w3J?(4cg!|MqP%yrFPzFd3#o|6!*4i5hYOV~bSdU9+z4$K+Xq zZJh^D_hd8|J4H8ed!33;KY>}Y$(Dg-OQ)6#tI~RC0Zeq~f1|%+J%|~1V5rEVyJa(8 z!gQp3f0wvq*S`b*#R3yeDOF)X!>dT#B5*H){fr%GEcrY@(Nd#~syW@pL=kG3Gw5CI zx#BntoPoCHGS}lw2oaOwiOXi?n3_FM3pZ~~*B2|hFY{@Jj9>#6@cUil=(`mw^Uq8q z)mA%47^Z@Wu@TeZPn%GS$5Bz@OVIpuA_4)~*JXpApvOM8xf7hj_*~VryV5vliw*3r8m^SClnI} z>3Wpk6es*4k2rOx?OkN$^7K!|d>eA;Nd%f(lFU2^fux`nnT|!J-j7KEpzaN@Agc_2 z3ATep@39o6PQ{}$IKF(UHkl178nn6T|Mx{RdO6@FUtHm({7xBqq#g4tmLji*M)JEVf<&T-%Ir0&?{-j{ z@63A*od$8P-8NwLuR%kz1$#Y9XTNLrjvQj#m~i^`Oi54`Jry-;-^#yxZF{1x&bhP; z+!q$Y_`ex}yz=XvAS37D#!RDL&-UUb&=xywA&1+2fd+%p;a3ew@U(FRH9zSNTG=#f z+hENPWsOExNm0}OE*9*5;vCz(^_EgYDApCDGof-yK9oEW(4|Akl3*D84TuUXSr9T5 zPW<`hC;hFK%SPL=q9bTg+RrFIKGX>sjrpu_U`V2+6sNW4iQ9VkHl_+5i+XVB24s_` zj!|%6*y?^*B@k3d#ML{m%G^_8-vQ&|bfBje?-NxXsugXAm(Ss^gYKu6j`vlt!tS68 zr;jd5WVk+XcsrXF>2*;t$nQA^6T>!aorUc=*GnJ znTAuZ+Q5>}9-~9dq$221u*t$VPkPLg069Q6a?a>WAf!9|XimykG<`J<>n{xugKZrSuIET4*0}F35HPU# ztbNYw=;v?d0Tr6(V<2$(R4F?!kS0F$h&xF7$AKDq!X2CF8X)rP00CQWy3VCAbNPwr z?ne3dP$1LEe)hA}Okww6L><3YbGUbHk!u}jv`Oe5IKyVW_TPVcR-DF$&9Z9?_|F+Y z(@IV?`(Cj*y4tEgf4+8f@5Ll-SiUI`ZM#ALNc60({C7s$FFU-Oy&JRi9(ceZ+5`G- zm(LRSt`mA#oScKagzvGI3SQdsIkKGdv99!ZitpuV-)a`+4Oz_7ul%phH>>l;Ko5!X zag&!vK4jpfw6SXH11-DBirzD!BXO`&kJ~@yyoTF@iSY-jfGkUKX&`Zg^ljA1f6CpG z*f`A7HlKVnK_pW_6XZ}iK=>Wj3nq*abbml)U9h$Ex8>?{E7T=wL|vkfjsqKRMjnCg zvq148A?vk1QJ!~0!pptR-TzNpxH3Q}fZbu~06e-SGy&*!LM1OxWcA(WgS2bG8)Ybj zXT8cJ6*|M-k#U5A_mzyDdEURyV(9zV#kQ)S58w9~w5D(e(vW&{(^KRW$NBMg^;&cl zd^uwa^!6Wvw|nTDQ^JqzXuWO-nuk={!Gs=y84WT=ZACB8ep^83*_W<)p3Dc2VBHu7 zq`7v_M3yrRnAfmd)&sq*LpLxLLJl~GioF(wA0+B$wNAg`P2n`cYD-^}j|qJCXyD8* z94OZ$!Fx6wA`oHD>WOMjF z(Jqq0t9E28Lp~wjtN}k_x6KYt7u{3%k|1QJg$K2Z1? z*&x{+H;d5C?4&e?2|IRmk-EnHPe&&ZE z`=?;2L$|=7{$9Ia7Z{9zzrhz$d{Pr~zTIpDgi;DU5qI=lWG@z3Sz$7t!9*5v z;zFCw$P3UYGPYpcYm~IL{Cn3?SNIsHHhFHdTLtoXg4ey=(vbL2-MujZP<;pRPW!lb zy2aqlC_|)?$T!d+yMo|u|OtH_+2K7Us>+z{f(_@#qNtulxI0*;+b53U}^DLu@R$Z6YIL zVg8#~e&DieB9NR|#0TA}(s-vSfgm(x=Q(Ke;l^zO@4Z+HualLJP5lk}shFNw$QN8d zr9-%vB!8zkK{}`cDTk##0V--O{(z+rR@J|`x1sb*GP-4bOlTUbT zio6v8*7rAn-Dz zY6R)RHRyRsFi^JYo_~Jnj)gd#hrI(kVPem?>vQ35TLm3MPTByg7K)pKp{EL><;d<- zD^3S6Hn9M@*d@cVJJLT)5O|pQ<;I^F+F&>)b8aQCz&mYB0kga6S&>4iHq@6a=HZC# z^UTo0;`N9HTg6gZ`QN>h<3qg70WdO;i%x^gUgmVHb6RLJq}Y|$Vah-wDdMlLmHoZl zDx*?(F{)<|Z#EhUrhq3lXT^D}(h?=N5X3Yp*n$^JU@lz4c{pb-v60-CLA}RPPbrK` z8&aBIRJRsx@#e(4Vh^ZQZBpGs@GVS>DFf-&+Gb@3g81Y+ltM&?1|}f{e;ybq-qVyfbh& z$!D|hpx+sNp;x`E8KA6a0M4B_%~IpCPXqU@ZbV_6U-WT9`z4@I6%qMK++FdzH-v`J2cj`S>Cn7)GGUU2+`QAPXGD;ouJXB(tsTg^e-Mr z6UQ(iHsClmToC*}2uQbuzqkMHwf=MV9R|NSq=CP>RVb+x1vgD2^@brObny|?qjKBR8I~qmxSjn`9yy} zaP)f8Ny0)QXEkT45Pas%=u0@yF^df@Sa+ve$Ax#URZvcE4`RyRq)oB5k`4dCSjhpY zGBxm-@43Zry#O%-xXj<3+Fv{xVKj547(7z@cOR)`AHlBIDA_ta5Q7qzu%wUvgU)@6 z^`EW`zB;4&tAeh(sCE56-!KGZ@#sVV!jBG1hDYMgZdkGasQaD#pf9&ynwcUf8-SS7 zyZ8=@zAnrR6COii0)NzbL16 z1r{4RTdl{z9RSJhXXoJx{YaTX?Hho9As4h5OfFsugH9BURtbb#@Z&qA-ghesuT9+O z7oXnWXffyK_UDJeyww1@$X1qZe4mK9&DjmX3toOz#DP}YKhY*ssjT*4)tpu`GF6H0&h7P z`4Tw^+q%tMrx0x&onh`u^|JF&zD*6@hfg0p)NIxmKDj2MVQ^3d-3nA6ChTpz$Rrt`yXP`$NzO_5i3zfOo=3exR4EMXu;rw)OR#30OR|ImN zNDKrobT8qZ&i{er8CI2R~y0NN}Z-BK>pi!R^R zd5<@s+(u}w^MLKMu^9mwkuyJT^Zx(Vcz?F?9AN>qn#bIah^;>F~gjz zd-o;)Bqs>ot0Cx>*1_-kLZvX-Nzajf-{{z`CxKy}A zl)Ag(IG9jDMZ|aaO!e;3l0keD1aGM=<+pd=YKTPuQ~m*{xE=r&I0O2rYg8icu&-J- zSpcYzc7}BCh&}y_V<9!l7?rHH9!T3K( zc*8B(s-d5F45R90Ee!isc*>xB95^ zf6)BMqb~p6(%;u-uJToo#nlmZABc0&wAA%x)wFSkF#3z=p_z z-N&i^*2Ie{ZnoMNXL&~nup#XHLcjRv{wUg=cX2DUaoito?+xp}ms~2TAtI1EHw`G+ zC^(34P5}89cLyL`=*GLD_*|?el|?vob;`c3&lXNL6SAxl{vC3|{|e3oY;D~b6T%<4 zFL;gFy#A~6bl*6IaNyCw-DQzI@w+SjeeV8G@h=(_|5E7EV?Y*PD(G>#z4mt~!=q3` z)wPS%^QltikgMj6Fzbswz!{*ls#{DghGQDU>i%rJK$C_uzH&#x=0lX1F#R&BNI}8k zPy$>?qa7rH{bfEk<6UhO=yTAdR=zJv^_Z~|#EG}v?;he5UECkL7G6KNiTM|N&)=xo z#jn#LzWvffUT=V<1lOw#xi6&Qq66IX_xnN`*-uMPE8Nx`q89x-Ut|1%QYzh=W~XiJ z=N4vVum!Nq|9>S&8yyeH0(shDN=SLW|d{&-bXcUSQO7mjn!<`4-CD~J{ExHm%h^G^}=mtJn1V8VkE zV1$}RP$&~+bz;ttj5rhdK|!}p_P#7>6IuBhe;?t#R)XW6f?{~CI&&U<_mzZzROk0J z-;ZH+Zoo;(iVC#Gw(hsTahI9}eDnPselgKFKkMF5AtiHBo~){kEeUO0>Ek1(Q z%XREYj0w;uBND7J&WguGB~^T=>SLb5AO3uG-H)Z6{$3_xhV$`(Th_Nym!tuUljO6S z^r6^K)`I?%bo)2ig;lHl8FIg88mTTzC>8%QA}rjKCA^SgBcsKekX6HPT#{}tyPc9j z3&p71sNWK;Y+MtHrdT$(Q}d?t@xgbbvhD%n@>9(?#x9);oUhtZgb@9y!c=w|*-Kb# z=(R^^hm2BK8f90B#GQl`5aqaw+IGYd{TQQFL|lOA(})bAzQmW7bNf&o%|xzU34Qh>yL~GNGB{KKeBd( zgLM9Q?@xBcn9j~nb$cPrn_Bisi{HhL$WMg)Mx*AQJ~M*o^4dnl{f5`gqs3+a^N@i5 zK13}if8=Tp$6b?<$x!&Od*Z1cvKqhTIR-6&AE4T5f+sMj#uOHx4JEztX>;9hjn884RFzMr~ zJ&w1Zf=sO9V{mn~P$9F{zQS{LcrwH$)10_7%`pwf8dcr1MylQ9sYgKD332XfV@L}s zvx{K7Y6$k+CAr1d_2wnfTiTqAtGIhhb)p6K)d9$%G9P$$;UnlY< z8o(qCkez~J?Y!+rbHC9qassSTu%A1u?n|If%<3(X+lL*(!lC|z-B+h8JzY7m=3U0x ze>#l|R+an2qgVE6-pvx_ccor>geNqoUq8`{5@hHKzcr#kCt}OQ{25WMb+<>dLTSh= zQ&*Nb=MGTggTYITF&nCpf*9!ZioV9@(QgW0L$txIa{Y8?WA^xrU`fbN7(jT-q&iGY ztO4yCrh7GB6UxL+b>8s`Wr%}A^mgCi9ep1y{p`WY@?ncem%VdZSlROhlN5->5Ua*v z8b|k7OJ^Y9yO!6)PL zV8lKna+F%+cGVi;3-s^U=!Mx|{bTGHZ6q~Ll zAAgAFM1FaTpraz!dyD()WQ+fx!}g?mgrQZ>=c3{-Nw_LU6amT97b#_sEB!CHXIm8b z9aq0fDlZ9DphV~9aeVI|#w2(H96yA=j9ztMI#=zlJQtm1YwDa!^ZB%BI$v-)jbH{(q29mW>Xj|mNVJ!gpO+hEABSMMI1z5`ZrBlJ zxLB6dDcT+19*wb-IVFGUh#=}$wRQNWH@7j$vw7jTY^d?3>p`VHCHB{)A=O;fg<$D? zmjzhAP>N8Lc~5ybM>tgOQLE&Bnc;c;x#<<>!Fk}YY4Bs;CndKJN((R^aGR4nZ9PwB zGW?QPmvi)a`GAomG?UIUa%A_eSu&f+VAUwC{Y3;jDEQA zON330n=}&JvDq+79#*~jWjOe-%!415<3b!Y2bt^J=RU0E;6fw3garzkXDXyLHT_zb zHS@n@sjKsm>g5iyy|D0L5&P*X9q%q@skF%4Lm_+V zn$Nc(Un!PK6>gt;>o-d}xxS+pQY1yZTR7bxAJpHv2drFnJ0=J6orlXCeMi!vhs?`u znRBK$@uQ}Go}n5q*=Kh|O-q)BvFr*;l!^UC*8*t z-BgZl)Lq!fxPj5lgI}XP^_nhbHceG}=1rBtYiQS&0+yvSJstHIP~R;D@98W3Q~lG& zVh7cb8_c*$IKS>My&+aUT;Fs&q29}rH zInV9UM`tmYJ6*{$h=63TSgCV4qqE!(L*n-wnTTx2hPQbHEL+Ci~}`C0KA72mDN5De~CkmpaBF$Wi9&2Xs9H{({tOF{N`^| z11+c8UugN~9Fqhfc||9>HNW(IEG;XSD-os^Owf#*<@P?UlLt=iS|=$ky=+hY)L~RM zr5jI7xdzx?B+v3+UZ)pU>rS+APW_rbj!(*`

GtO8cV!)DjENrcEt(yhOUyL^3NmGw9Bq&oPPBHHfpb{T@A*-P*_4 zUKLDXsdZM&@j{iEdny!)ILjZ7RqKMQd%VF|z;-DWfBvC`kL8;>C8C4#W$CS`p455Z<#!t`7Fp)%`R^t?4LxY|hdiE^EHMA=}DN6PxuW2&l06zl#~-185aWEX`$ICGrBENgx>5D@mJUsg-4FhQ%+@pO zBU-;aFb+e}oq*ItTt~807BgEnA3D7hl6<9@a5-YMH>TNiI0>Pf>6NB?`7qwPLC@2(d4SC{#c-^DKp4Id zT_?8_iX+U=GW5782ujclwx_Vi&;KebJh{S8HIGSmiyx*i9iE>XqDMkS9Gqszj`_5W-8~g#t!NRHVcV2 zjVRS^gR5!B+BQ8NY7zW}RQlEoL@?ltGkz2oeUZjzy=oj<90AG^1Hi``KH4@r0GuEu z@1=+BDQr)ds=m|#j&FC+50?~LbT$-`A8z};z1~;J0?s<@Hq5VS*(XBtL!YkpeLT&# z?rBo^+%+|M8wW~pt-`)Hl7U!BU^jQ#qlZW&pnJH|>^WeU2nX zx7OSG2Prb&rNNbAG;C2xN6lH~a~(JGhWtsuiFht{9{*EWExEY_QIYQz>&N#SuUTRE zX^BcG1OsW`>Xksv@l{J?Vx~S+QM!N3qb1G=5CvO$b)Aj0G*_CLLrBoX&A$2AG2wJN#BhbHvI0SkuEVJkw)BzQ zUt^N}gQ?&b^CJAC7Ydq6!MgA)ZJ64yBI0Nsds?>LL=Jyw#u2gm=PsP!Qz15v(T205 za=V(Wk;2AbmEPZ)boE~==D#E||I-t#WK3^=aeK~rIBm7iee~!BL;-OZKItW^@(rIf zK7`O9lEy-U3zUAt@RHTyw4fQ&Uqv?K36XBD#Wd=mXjwH4Ee3aA*CJ!ZAU_$q@<$_P zWkTVC?a4LC6Mo+W7Tta|zXSr%Zm!X5{7_a0wD&10kenGxGQVP_F~aN~3kw(kT5C5T zD|%aShq3K&wIiB>g%BaJQa#Nd>Hk%C^)V4*sY|3@vN2)(Df8LSN~FJPZ-VMfVn-Ye zd^3g;Up*0H$tnz*Y8-YVYOT1CQ1*85FVZQ49AozpqCNczr+lC8ddP&9eCrnBh#V^s z8dcV%~+Bd@<vlwzVYv96UHvL?%=8+N3kw74jI7NB!UtUh{U=vIj*jd+ zht&Z?m*bf)8z_(8ab@A%yenUoI>6JSD{tHt+`2X1{&jG?oaNpBVI@H~b9cT(`yBh3 zC5A3BJB!ZLAGx9*lzd;pHe0-i7~JTWGQLc8`f#L+z!oRq`a%~a4ye1Jo*+WTz=C~dTetkky5isPbYJ| z=>qd|nAz+9;2*3e8HZW?NHu>Dm>&T2n7;O3T%Ro@99;vxVJ?t;F$FM+WuUC~?eHh) z-@^ykAkfw|=c4DPUUS&oy9|N*8P9bDu|u zs6Rx%I(nr%7+pN21!e$Ynzx@~u-8$)Z3a+^N+t-b$Q58o9wq~Y!&>56K=TQ!9uger ze-3!$?>e!bhk(gI3-~s0mg@RcnZfzcQ_B{e-4`l33BPm5B4(HYfo^EDDQ~1-9y-Hv zi|>X`NXXV8LV@)v+o$4L_RH~yL>9TK6uf?WEw6m?vOMeF%$4M2`k&AKJ;{~6AY@?u z6?Y=BP;T0dv%r#XNn4F-Xt>4w&f`{tHdDj^bmA?%sC5!KjM+isIHlzYNBW5WzT^xT z=)>$)&x|Y-h;9)Nrh({F^-nVxD;NERGpP9t09#ow21w78_5$jWLA5p*lLD88sY1@Q z^$_p?s5NM_a$CgX)A#1XnXjBSCluhol&OW_f9drJTu8y!=Fd*RC>(An0Q>zBrnkpZ z(L~FM^s$pKvR`E4JoE}z7uQ8iK)ASEBb zlgyy&0A&j~ee(g^qxo%CCmLG>UV=O4B}qR{jcf^tCC3+r*XDdcsCI_O$eszt9qVy3<(@lNT@e}&abA&m9O8NBx~a2+ z-o;9?I1V(jwNcmXFKumNl~;cE%){^^#c^nAQhH!aGonA&0dpAZQjhTo( z!@~kvGG9+Ai_sY=auOyZhy$m9TIrZ2jCMMJ!7JIk+A>7aKsa3-<66W5Mh$zpSs9f0 z1IM3Jvl-VU`5UCtH0T>pLRjjYa+F6E+@9_*svG2i;C?j}T%SAGBp+agd5*_JB7tEA z0mW*57e=)YZcuo_#Pa6Z;GMT#aFaQVl4LnebbMMbmhpgc2(aGel%TI`=o7cn>|5|aX}q%Pmnnm$H!GWzyT^qdAoz1Xi83me z40Cne?kNmUYL!p#Pn2@A03$?p<{E#_RPcS+*0Y&<$ix1NbH(8R0>MA`Ng`8AB3!{@ZFzO`3lzd$)k`DEVjLkBA<_E_7P z)V7!V>9l3pYH-R!NYR1bSRn-Gv@|Hue2)uQQ~-jWnVd-r(!zgNgqrx+8_?>eVr5w^ zyG-c!$NvtAKq@163!FcJzMl|{yF;%Brh0lvYNLZ z#N|SBogJRM2$Ak1Ly!;`7bhf?7MJli!=^`#eIK3vKAQdoiiAItJU$wFm@6V8?Gyd@ zbcThwo1;{pp=R?BwJw$}_8$sn_Q33g-OWv1ueqawl2W2KjSPlEA&IHy5+}W{4S}*4 zp}^<1H2*GQYE5fy^qvuk_2475({3YndB#|!URVI;>j>$`))J->Ktn?M*ONRPnhz^I zQH3*#Gw*dBL7Rre9gObkN|$XX#cgo`)I7_X=`Czbvr7bK$BB0R*8SJ*NFUWD;z3(A zd=RdAOqkU^p1&M&g98*tn%*oeEzOLF$C)5Q`IhbCBMkY&a&lDO2Vs8j|F~)4(n#D* zl3E4kPf#B`r2c1~dqVY(L@g^jv~2$Ky9KH!wK!y%g(_sl8Y6Yg85~9K(LO1UGMb_a%Fz2P0itj~Y3zSoGi<#!L=?EU9&zIA+`=&;;-O_o~pad%a zNahLPEJloS}dCZ|AFt@tjC5*0A(Z?y3gqv z6irJp|ICs>P!JL=V3uJs1KZ#^oORijJ{vcy)yA;D^uePcBuXXC8(4iGSa=3C`657< zK(7G0Ax~yW9K=znP>VlfpZ_Lk2Dr&W=AOAW;hi3h%xdogHy{qyEi4%t78Z8xvlvP1 z%t1=P=EC@A&nxHw0h=)m*>A^W=(S~QXJhSHK|Mjy&tkmI)o)!J;nX~1{uNnoKaG7q@QK6xDONw}ScpU}->WqQUmsCX1JEj-W z+PoB)1oEPe=$TxRaQr_u2CMHAQySq0+R>QvHHpG8q#NbWZD;6raV6K2QdP+VbAdW= z$CfWW;=UOXhf%t?YJqCT<1{RsDhXJ$ayG!))BXWIP+wAX27nipVL_t%u0_(A3qWyK zVLeElt@nXjr~cz!FkY#l^K)e;BKh#+1p66kxTU;9~uP0kZi@vF4C4rzId0A~a9Okr0CmVsA$=E9=QDPTZmt(&kB^tB+ zAP{ZI{aIg^Wt~l*279bzHqQEkvB?@+gHcL)EEBsr424o-*GKP0*@Yp`WS?aYIkc~3 z;<33umL*N?D%K4TP2{=}^(uKtz?Rj~*I`KACLLagg8Mr6WI=dQ2=?ce@oq0JGJj~t zh3YKsTrBQV{97rc@P8@7pzFbW%d*Oxbr{$BXWU0i^sGr8^Yq={QU5}8%4e>XfJ`X` zkGBMb{EvTv5r$0yX@NYl)Ko+)I)s@eK(iUGB3?0Z@uAc+YkC3kULw>-Aw;NH@{i=h z6xSE^5kn<45j|o*9?92ihF(Mnc!Oi<a#oDEb@)Bi{mcuug`r-x&&~}!Gj;4E~ z{=%+txM9K7>if`oMgApu*=Tw3lURvhpb$0=^qx>HM;5(0>zQz630-T%OB^iYyj#tSiJU zI!|0_WEVM_>4&IzLob#hh==U_P}>QDzP_MOeV`%b7u>6Vo+gsxyY?uP0hc^IMG{Ig zq}6WosJ=HcL|sNLg!rGFCMJE^`dEJW2zk{~S%9HyJHQSx96u5D6abU~j>MNsJbVca zn4^dqUDwK__5eXC6N_beO-mNcIy$y8s<}IvTO_R@atLjo`pWR#7qKIRo=#rg-~Yt} zOg}1l-lq4cDS=P%S9)+-Cubzs|(IwVSVbl?L1vaI#lOW?HzqK^v|iVwDK15 zI-7!kP0Y8SmJfb*36M}ZN8JT+a^t0D%g`ALgJPoGOo8Sx+*K$zS@k~y8)>V)0Dk)W4f^tj$JDq>M|Aw8!Yn#A<7o{|)%Uim^G|2XUolxkO} zqny zmB_`T$5%+>FY6T!eH)EBiu~d9z05Zb)aGhYiQV*#$62~~U9-lb1ZA!P^NtieqgCmB zd?QyK74ZP+=jF9jArKM)1RjG8mI7UH0l&Qzc zAHx=JH+Wyzes=-!g}C>d+J@kj#p~bc&F|*C#Cpoi}BC2pShIu&=fmv%-s^W zJ$*+1XZKU-@!pTE_^tdxWFn3Q#V412IxiLWD!PKd)0hHcK0zJ|CF3tnFd6pUv6Qt> z1R8%w>%NX3Wb=}gItxPjcXcw^oD6a@dX_u zib@2v{o3kwzD1$gqFj?SKGu2vfX-T9P)@!Cwy{w^sBhJlB!>DzBkZiWD@)7 z+GYgry1hCWceFo&CH%53kp4GdqrOD z_583NeQ^&~!;?^_>mets60{{2^AzX>xGY;!y@D5$M^e*O+uC>^w6UuQI;4a6Q$y3 zYwZanW#4^JVUR~%>hiK0K~d0*HqkhNTH&$DEe9>cIOGHAFxm)CR@T9Mgpt;2(RX%( zDdp6Di7d)6R?9@-<9Z(O>RE+qdxXUH~+PM zt}l2r5diF6FfyX9L2)v1dq#-aD|w#P*!8shpHqHm;Xh<&FGh%jZA-RhW~h#O%7A@8 z_ozEM+d|IjG+`XvA_#BI34cogt4;jRYOiEqW|wR3k0gCV6a6Ag9JQ07DZkrdJKyAX zyFe9G69gvjcFEp}Q;HlM!WvrOiDu<=gcu=J^7`471Z)_j)6`PH;o?elRXYB$|QxHeQwJGGojI^C`%gSEN4lS&5+X& z7;yarPa_=qd=TXw19)Ul3{beU-<+-G$XCsnS@RM^@A7(GSM|W^LLx9*X(;{0oRPq% zYI6YhW%zwD2}a-kTMWPnTnDj$CLlyv0Td_V8(cPeqLkPWwaWSR?;7AO;#$z-u;~Tc`doCy*A63C$5B@1RJ5a-x4Q9s^q0?=m2l~Kt{=2Si#;$|?DyyML{ z2>;#B=3Kg6xVX3`0EzX$4T$|nZ@S{b0ji-ByTFIwbs;Ue*0t4vf1ouNHUlpqPBET| zrehbKgk71M242o!<*ggWaE#HUOhKU`Zvft3D=%9uj`7n7-aH*)Ua{P|F<@E+w(RrH zwfba%g|qE$g5Q=%Y5?-#z;ciYeoCkXW59EcTD&d;Nvwa@y_5V4%JId3!%pkf{Kj}h zg#jXVdsV$3C~*Ut9MWJtbk4Say9M$kwq?2k>W!M$wSJeY$m=hv2`tJ=jgc=F7RSH1S{uANF zF%Z1e{PdWw2*eg1yf?OK|N|DPa$p{OgVyqdcFjGB&Eo+CS_^~xX>kP zJyLP|Z2)Gw6iDLuB+?x|q8U*cMn?>9O%zMP#WazT%~*kauRpX5aMp&%p2CpoCG{4- z%IM72hO@HZg36!l0NBE)Kx%(lP2$J5!V^T`Cfp}c4-w5Epa^8;^n~zd351n z%0~|(I|j7)zEZAH+Q&L)d?gE(n5#bdO?_oMjZ1y-45ek9wT_+(9IoaX_H;$7a=Ed1 zX03=aCYv>Uzf%gK(QDpm+sTeu>nV>XiL}yN;km7QHAE`rbnYvkEL(K@MlX)mn8AGN zjz+`^LSYqZk^N0yXqyx=cOv_%hF|R9k8}!;B{3yKy3%7FiwJ>XWc>G=6U_afK}2mv zt;)2Yx}>ZL_|!o?s77U9zjD5_*Z?7mKPymsln=!2n0?6okb|rxKn`^+^&=)3t*~OW z*Z!h}#9N@f%V8$<6R6XMayEfZ6&emczKU`(rv$D{;O(m>R}I5DOp$|t%3P6unyb^s zb?LlTnmIVV)FHX0EugN$c00+|J(cs0WyW!<25``(Ox=-on)Q0iVe?-X8Q5bNzB&2Z>LwHn9%^sRKF_q|| za4(V}Exi*?%gY?~x5VngF!4v_3slAxCDi5@)}{7FZ3kpRqbJH>-W9z(@%N9_M*9}+ zbZ|M`&GAAUc+teQiMkbJjk)E|hGU?9OkgI5X0(E?mrACv{&6=Y+l>@I5q!bssud`2 zoO6lFn(*H0b276DUv$}Emi?_(>xIa|J^+a$B78Ep#M zd}7mz6c(j{psq=wQVm)7Df>FL1q6I;4hcbxaYdw8{1M`Ed1@=c3&un|?bsC0jy^|N z@)Pjj^E`Jrc+sS}8$v7$0frLyn#L3F8oF@}gV{-`}tf==nph;G$q)K>ZfG0<1d?%J+14u3I>SX}2F{+56-Xp-zh zu{6vmP4)sd&R`04%oC)XAW}L>>I^(K*YW6cr1=uSARPglkZ~GR{WdZufI4n}h6WF{ zgHgz7ow|de+N_V3bJdKr{qhV-_9F$V|9yfT#o8GjFSwn#+~sI)g);lKrn%;x;u{@+NrQ>K(JNsj zUN_-?1lgp}ILDMmn?83)L`O!4i(=4DfCH7lW}uUvCFx2#yZk zc$8?vDb+0_6(XaWdy|sHa+ioAOJo5_5uwDl8RV28LjU+&L!1yMIJztZYf7`Y%gS9K z$UZC}AgWzT>BNIssFMql9+Vy=m}6y0Wb1{-PB^rg8SK(_dr88CgGfz+m?2Y`?>mFK0q^jGIVMa4T(uZ&$MK^(no!Uqa{8O9^5V4JUN)-ASgSp@in{+ zzxeD;AI!AUuuyK(ez!an**8 zD^v!F2lFl!u%9F`0h@?c(yY5b zC?%_naNEBsB`7nY(#dvjmeugnyzOpE`#IA>o^lsS3H|U_!Y3$OtRfLM(2bNCsp<|t zN0`no=uG({tJ2B*-p`cZC6g7ChLN5|hCy-jKm_uLLbP=CJ`-F1zsl8LRAGWiKV$=@ zZltYvEV1mQ9_e}9EwXKsyZ{8iAGv~Hv;x)N-t6klMxK;PI{q1AsTw5e&5GjA^Du%u z(mkd?{8?dKC>AL<#=OcsUtOrC+3jL8TT-(wSw3VoeMXr<9iJ-|$2oeKp%B3a^4-dv zH;g+V0PjnkkLvE1W|4iNex-5K1zUd^fA#@0O;B2t7xiril)A2*fAY-!j3<#){L zb|$NCXNW4PY=6}IT<0TJjiGc2U*uAzptkz=#~z#8#~zi>&-b*^Im`d>f!E&jap)yh znKFz$OGxHTXX#krtEVdFlFl}@ju`Pzzp>GbFDRO4uGm=|I`Ss(R#`T<65+0#5x^cm%1D+%1(YnNA zv-72@NFyN%IuFGEvWFx?4_F@{>M%m&&fXTQVqHX@heuh;Q$UnO|N^IfhdXXi0V z4@pT8D&RMZ&#Ax~YM%S;{i?9w#jM(d^`N2FG%s#I%$%?J) z7PwW{7b91zHwI%+t+^Z%HC~#FnBj(mRtCw;B^CEMV+Q*6@{^`UFi9Yee~jLE<#ST= zJi1<=NS1mZBrfQ}e`U8v`|o^%w`{*%Th&R^Px^C6rpO`rkFD|4kG3HpQm}v8J}EO1 z#?sRmL>Mg+`i3jWTpBeaMZv^!G_D_UV)N_uMQYbjD*lh$jV1JS_}|N(S+Gb z5iXls*M_$L_~=m;Z(!R;94r1iW%b!XP9f^>XNXk2HZIq`_XKBcL=iFiQwni~0jBT&(Gg78pkzyfw4=DH@u&$ zG=mtoMa`wFNsXRPW9ijoE}?9rJbag>)4JI$zogIWc(m6q^L_ey1S8xGnG~UvHv8@j zmed{9ks#-bjv>=~cvuRQjqua_O%=qTif^y`xC`ZDrfGR$3-}>GvpDTUkKJ|l zF!Qw~u#oLSF+M!dap-9XoP}+al;}48GtjP4M$iKc7QS%jgFW_G@OlC8Ca#9lg z#O+Wsv+1JfbxTwH%>^toWT`<^d=X6WbUN-*5cJ}X9m*6`NU)x&*0!Exq5rAG;6*Ls zDkEIy5zL+?Ci4P=6g*SN;d`jbgMAn$xHrIr0 zZ^PcR9!BQa<{gw~bJ(I52AM{oAqvOF*)E_2#tO0}-U-LxZjp{Uy>i z7PSt~4k1MDWgo^}rNvwu6X9pRkG8VTn7qN0)jyHLNID3EB+>T!31VOyNVSVi#x3HD z)fLdGxLCxSA6KOHhBPm0ZpIj3v;5>^aFuPZZ-!`M2fcoYW)C1ocN+|E0Jl@c=Ty$# z9;1y`?JZDPWNtpw5)b zx>#L=?84r6Kj}BP(snP@em>LaP4+FT${(MZ#$E`NDvmBiGotcsJmr@H3dcG_{l4(waZ9`6ad}CnqzScoBz2qVu`wSXSFQCwjt3|9$j|%ZYnbt_#FT z`=6?EBzFtxq*6W4i`2(B$;X)W9>9JPvxj(oYToGfV{nZ7-(-!bZ6#^V4C)tK1mD0c#@Z}HrrH5BwIc1O zX1GSR%mSW4Ywpf~g=QgHDYL>M;qlL`V5`1G)t9-OC{9#M!RUBihwC&%rFt2e1MxwJ z2~3xm7SO^V`^{srtd!|;2gjfak&nh|RK*co2HOL*EIt`IpLlvSt4+o?Jj_|o_I~FY z=(3O1$czV;iLLWS>wMafLTO!KdVAWJmmkk4}UpXliXE2!-BH6+VJRkBW>*NUbxIG+FH&1RKkyAb&)4q z^LYDZ>w{b%a-`V(U%vj#A^1=`qN5~J-e4JHp zzL^FG^b<&v42=dCZ0xO`oZ?*7Uo7a6y10s?mmhtPB?psz(Y%Zl(ZlA67 z=uT-;Cdk{EpJ?1-9k^A!w1QeU;#l1l@bOx2;q|AsewCyb`-~AICsW7&eZr!Hw!aX! z-k#7g?$wLJQ=K?9!lr`n?QaMCnN!atIpjg63UH!a`@B%zm#&?#1EK`1%%cgMcU~)LEwhUKCV+k z3r`G=)htu6NSk}7-sj^j-{|64O9j(8-X{qe~+`@X=YjcS7Rv>^4RBC7!B1zqQopsD_h<8z+@!DXWU+5J!e!of7AidD}{UT$X0CtdoVXsnD{V#$;OAsf2NjLb(q;m!1CPGliJc6 zweSt;Qa(=%Z~m(}z1$P&_U6gtdPJ1hkD@FBTHbW;9T8So)F5)8#Xedt30h0uiyV!w zaYojt75>ga1uRMdOH22maOIbgk9iIu(^o#wSRSNQR)Cw`ik>b2e-T zg`vHEIs9WEt96vLTgdOw*e*SFN-qFEkE(LJagtB1vDwa$hb`v}zq7TTt87=^)Rm z#@C8%N;@}Vea>Unx;VwzyB#z8X=RzwXfte6Z9HgUozvd*)b%YXBZN}QqW_T&J;aB} z-Ush*04FZ{Jk6pVl1_dfy_MDnaQ;VCL|e1!7QD5*O@ zu*==U`Wdp^3I8;C&phY@jDkE>EWxWa2iIf|yp-%~m*?z8*(m*FUs>IZ3F#l2_*Oid zXUEMgf8QE;(lou5>;89JM5|GkgV3_X5RWZEd8SrMV6g*Xhc}dX=oK5)e1;>DmrUTp z#4nm!xU7MKlyi-*syTEi*<5qemrD6W+0w@>2N;AQkaIzISTsVV`n2N9;?Z#)m;L#r z?I%sKEwm5?tvr0IgEzUKHxBEyz?>!-dwx4|^78d9F^A@@ibnWFpD~?^Ci&vw-!lcw zG?!Vy+t)I=-!2CpI$b4E6`j;w6u_*gAFbLUor0eK?4lK0=_nb4mjKByo`BmSJ%y-e zGN>%70cqxa`L;|E4>+bb7l4SY0x`=RkN&Di&T+096+KnUR0nnM_4>6NrRxkHHPKDs zGaCb0hCEhAdJ}cG8gXRyOi@*ef(raT+<#1z>?aXZAn)j&E-_N7(VS7%w_5m^aJ+i% z9f(`GP*Yauto2dzHlZ=$$Y7Y!{_9gxV`dI7Z}kwhR_#U^#?~K4jH7nlbb&k&;8odL zI;TqlL-9uiR+Vu8{6jYyp@J}^y#=eZ%Pn_wHd2$g`^_JDY21%zAF3l?mg^A2gdkF0 zn-Z0VzGI^p(;xr*5N9w9Ped9qWNL*ZEV>(wZDA6qM%_dUe3r}ibXuDsdR)5_h-#lc zU+Fi3WyFT7shgSIY|IuOkm6pWh+*7=tMGp)d&{UQzpw9Ckl1vmu;~Wr?rsSYWYZ-r zCEY0vBA`e}r-VvMgLJ2avh6htgK5zvEWp9D8K-Cu60qp2v;Nm8toa!e(b^rtOx4BGR*U6S z$#S5;MSsL6zvdKNoD?zg+gTf7l~gQlJMiq0lD;g^@%ZIAGz8W&5=c@dEb};8vEi5V zzeZtMXT;YoAHJ_{uNR2Jt|_;b=vuhe7F}|@@XLw+<%^TeC#>4}5TgdyaG+cH2s9o# zIvX|W-vg1&I5J+wN0~shOZ(>fsstn@Rt;6C2LQ_dJ1;>3Uk+%mWwnForzU9n`5Q#+ zol8`J_>C=SJDd9`M<#p&;N3+)5;`V~9_ZucUtOL}0Nz=qC!UgEsmy9`u~`QQ(abEd z>6I%KY}V8g1ztHuE90tYp>-(zxsSIDOshTVlP8x#-#8&vYd~nR|c*J>K+>Ur^}UX!btQcJ0SuT z?U&j)&)vXX{gaZH8RvZuBjGQe4v5iJ^vY|oUahOm9e=0^H}opMF}qwtWL$TT)T%Aw!rOnT>YX9-oKXjD_#Z0i~rjZYpY8EnJ& zH8!v>g(e<+nHg}Q$P>?MQ>eK=N8`JqNcSvyJK>u7(^mE87NZ)^IsyB>7lsMP@!mj> zcQRj@z}3+0xF1W$S7X}3Q^dCX+YA*@^iqCYB!R`P;0?4%Y?s@DkGmf@)f2GPn8+z8 z!0a@fG2Zzj19R7IHP;W!-RCcYc^y9&qs9H6!`3cbOg}xScM!KOUBy&+x7D9>%<`D8 z3@G(PZ+@%fY#GQD6{Hk)4NGE-yG;D8NScKQ&vm@xmPO-#+)LEk0~i`xIF<>8^1!oR zO70r~{Cjb8?XQw4!XM7clFw;aF9qH_hC#U^`S)ZALAhZ=Qoz-TUjJKeI)tr4^_xb| zM+~qr6;eVdY0LR4#zFH%18bvy&!ZQT>K+C)`?WVW%wf4Q1feg;vK%t?Al+*(agrx@ zt-^L&{TgBPn)mQzAf)KfL+)$(zNDUD0yJh#MFICZUST_`bHR`JtQ(HO5=vosh4d~` z(&z6)P&|347F!jgI`CB!x-!MAm+Z}cp_3>I7jI!Q=?jyxIiG%0#aqTcMBRR3;1%M+m%K4w_+1CmO9MM-yW60`pA3%bFwh7VrJ&3fA0znxYkncBt{Z7oAr$Fg|uk3-6 zJjkwwGnE73J0uNV012a|Q!YZGVqS^SM85B&5ahOY>Xr%t?LP`~47UFR7zLPg6odKs z^YMITnMz^~7=(=42{dR)Ni?akdy*r%_%!!ncf=~QAm|3Wy7Tit*r*?zN}hk@kjjdn za~WQzx=;^c#s2UPW z1p7ZB$QWU$$XVR|&)KCHL^zE>FmF=4ZBoNRGdr)jF~np@B0eM1S1E(0rz|1r z_NJ0pY99CBl&`C)>&amtEdzuq9~`Ubv^rI$($e=pKQvyNM-1ZB^83Bn!>u>hsxPrA z@sWW2wnrkHNuEq>H3G#&Lk3ti_wYaumOL*EA?sm^kJ3XRsVwp52Z8i+cy?I8n9v~% zq!fH3Y=RS$9~!}MAK(^%7AnzkILHq~=iZB#~-OP~wrhppT~uAs{u zoq$x>b_qSfd(g7`imXA1H(Vn7z7mOm?j*9ZFOWgs0K%Z}t~z*U0B5i=rp=Amu98F_ zWs|_AyMW+sWt|p(D zU|uB=r_^qs<^t7f=-t%sAFdH(9i71iR{_D|gDp?pH4?8?t#a?-=wkkh1z^BRDPe=f z*{qnA=}>9}_dQ<*QBE3|m%+5qZ>O3At@JM#No?~!^eZU*L)c`paBok zlM-$Dj1d{i$)VRwI3R1KAtsg0Zqj0y$a=COg~cG(D#VMt5O}_VDxF)Ew&utvBVI*D zhlgw#s%DNU#2JoCl2`^6_klFS%K~!h{3CPMLLp*ypwDdZemwv=+H6~?wSe|qyll13 z1K}e>?_Xm+tUFPwQ=b?h6-Z>)*YoCoVuLE-aYn!~RkePBc~X9ZoOpWLb4=F(S`i>9CM2MMhhek#3K)0;M+W2x}WNVoH>+QF_BN{5Je zPLm|#kN2|qUe3H1*#B$bd8uKo6 zyt!bkrzgiVThDJIbgw9oFmoQdZ%*Qwk!%*`_xGSgs#9Kw5IcB4Qrgo^y#}0@y@9V2 z(rYH$%@`X;=tQ-`aewR+j=_sl)B6oxTrN{i)RDR`I=Hk0F!!bohfJ_k&5#gdKzQ@p zg%rpemU$(=Z6oH z37Oz?Jl;VAQQgL6vKqlMJdPIG0YP4^TdTt|CKE~3qyt!h$f2yT(CK}h66>7Y+}tol zG~@?-!}h^pCF$?PSya+^3ii=gcJkH=&07C-!eQc81X*a4GNeoPuc#QTVli8!zegBp zO0}@1p(|Y}L%MVRaR$LXGr9H$I*F`3T=)(bu=Gr}+;Cp}iVJzF)11)KIEtl?()Ye= zW6`=DsCl8(exrhc{OlkC&5j7pV5dl8=#R|u#(13FifLFPrn|p(AEGg%8OJ0oY14LKIt`UC7ve>03Qg_*^-+V9A^Pn$GY z2;%kS+E|1@8P(Gvzh8Y$tJswjF!cjj**4{kPv7i_)9+5hK6nb{A$NrH-Rm2Yj{Gu; zxUyHO%}!2Lxk8GjKd|Nt<|7Y%Qq$gEWJ1atY4GY z)RM_XAeBU*PAW%Z2nI2a`22|4mN}k=+>ZIBO7|%CdM9p3WJp~+HT`~7i8B8_F@6qH zg@F4du4EH(w=6Pf58c6wI;+&1lf?Rj>9{znwaV(gLQ7fn$xRv9?)22|HW0DcE@Uq=B~wj%L|m0%Bf_J@(d<9 zPGNiBLJaGu=;Z~^+Fg+fEgpLgo&}mqwx&y=qUe)({kL^x?vqpOV=&Q!Rcy{7+~-Ba ze;45ZsLUe|BabQ0X%~0#Zq-&G4ju;pE=Dn!(Bs$xbB%VdkJIxnBSd#q!4MzE;Chw= z`AOF*Q0SYBqxW`p>*W=qFGF_)@gl+Z58`UoUM|EK8^ZI{KkbOBuvNvj2Y1IZjQwho zTo0^G_jq~c9bUsNqi|=>lWA#vHN1B()p@-S>(3}wa^=G=&F*`?Nl_`|bkGC^1q=l5 zE74zcDg07Ri~q8+$Yx1by(e2{l466vL{aFW*F*=)$1NvJed_ytrDus3u|+{@9<*tH0#-T2EfvT=)M!C9dGOvd3QTZc`iN<4JSmPV8IV&km6Jcn zE^xU`?0sXUzTm3Z+z33T$H>;XzQ0qVoJl+)D_`tU3=|1cq?_G9jJRP^xDKWDzdei4S zmr!;A*f?H)uv5b<6-yo$5nIQ6HOxDJ(1-S(l3Jm}p;bK(4Wqrp4T-$F2!4$Kc=;BrNUAor>8KiZFNFb_zrq_ z8LJHcVoSsG?%=_#r^nWNsk1s)XtV>*kU+L@@8ZD&k*8&7@(uLVKU)5z+%k|03&|{-tqizej|MQi9}WDD!LRa5wEdJILm+`0U?TJ;byH? zhG2&rucXCHtPV)MY#R?e0q6O+oqWk4>h_dLpZMpeT0X~((d$2Z%ul}mA>k(;usO|s zr}S4X_Mh>g=V%N{Z%5ZKVmNSw?o)>cS)C#=!nBXtk5fL#Rf962C3S1K6$LS&rP*$q zB=*lG=|5k5^nd?i;B4}Yefmpyh;c7Nchxe}7jcGwc>8o~R!1YDlySoZ5Hv14m$E0uqk>fL z%|z`M^*t{mUex`3*<;2cvntEE`x=i+6z-Dffh3o}<@UkS`R!smT%t#3?9)B4xvKKP zqrCKf>H2cI!;~k%%>C966@IXhlbOEpPY4bx2AS;_d2s)eGOZGya>p){!1O3$4PpiA zpnqnlMmQ*fPhao$yVD$g`3O^h?{JP4(ed zqau_aU>!Zeg4Vys+xNKg|GOVdtrKdON=)98?*+hki4j;f#A?VS>>bW~numP9YGy|i z_;S1kwt>M`6Y^~)O?nbJ-HVb^toug#c(shESt=@Bh?e>`_z}MUclgBdu_7iW3(($u zK--He=!mE=x8%j*$N7fq7x;3Wll!=EIvY6ryhp~ z@>-5?AT=1YdOQ9iTW3MMeUi6`_@Mq0s~pCI?xfEY0l&)hw?PPW%_ zcF1su#B`ODieedbmWH96-SvPtlCBlw_ym7y7Oj2rMe27jp88I?_Hi@la$6(*jkZHy z5hk5}J{0(pfSY)mnlBBnEBXgbiyU$2%O$S&iEMS6{POI2ie86%Y;44GmlvJ} z+g#N@NQc}vsES-xpcY|O zd98Wnd-R1dML-SyKmTPtK=OMoo8`z}pu?pV4wJDu& zk0fL?>=Os(1yCSo;&$KaeJ9pBU}7Zz#MpqD}hi+7D(XuI@V@C?**qwd3ILYwdNwa zoIMZ_)yaN=>~qzb@E0M6!RijaLgo(Um?OUU5nMOaU?fJjPtV6Pi%)NvOEluKyn7Ye zR)ew5*Q`U9OhRC19_@f^jDPc9SOeibTMpoNCbGUgd!IgmkEmY5RHZn8?>4vREt7@6sOniO^d?v z>D0ixFxD=&5qqB!{D_DP7_z!Iuusjvo?=I)sZT`xhM#7IY3W~3%#m}a%4ME!e)bc! z>P=N8(fxid34A30F@33uy9Lp{F@!uoB}RDV$i+vd6@s)qgwB5aJslfqcd4OVxfq{b ztysA@iC!ZoPw+||tR$a#Htx$Nfi1y22Ku+I!0uT^HEek#f{nA%fIpf$F-C~rG4kR$ zQT_o$2^GU`*fg7_p`#60Rf1czc3L`>~P<3lRTd;7m0iYZcW9<0?{7ss=a zT(PbfLl+y%l#8_s)z`h55NzOMHxHHlQ6qdW0{ifxS|i*mIZXRacQW({eoj-V6QXk8 zE|B*&B~9X3aRG7^3L6A`SG00r5%ik7M`jwSCi_-7S#JV-HRsYY;f}?1n)2;~Ozfg$ zXJ}MXl|OZ9Mh+GJWnNS>@o4*$j|UoiMYE+j(4c+I)XzAVAs4CGut2KFki5doLHy`L zvlo+m0)M>V;|f2CUGadgx4@q#1%Ktx0WVJMWLW`x))e0P92CXO-ZFFV8yMj5+EGF( zdkUbqK18$)4cyq^m-3PENXiyVeHf(BuDE5x?A#K11|VOhgTNUGp*Ho%J()^2BcwbD z+kEk$ALR@Xknk-;Rob0PilESj2>IUV&zq`wb+SIuSiGEru9~FtT64t>oFMQa(CthI zKgKn-D!h-0_>2kW=3A5RSD9QSxLISmb%_gZmw;Krd19djXWeL#ppt&szjHd)?TSrm zLp1hsJNEh>OIqiSZ9Ry-WzPKP-0T&od+|&YbQ2$pDsHA{+Qiu%6T>M0qK9XN3Ah&A zckEhEF%LlTYVfL9)>$ZDYcHPxddAMDIBVcMqz2(NY4h!`p%FEp)0rA00s8e@ZC32y zwp`iemUr>lxMMPn?{f{)l`qflj3@Up{Pif29%e@Q{gNmRd<31K*J<+bmE^pI^L#6X z>nxGf013xIKstsh%OyI7K9ZS8oaL8m`TEiF{pzP-(Lrz&F5#6GfbfoJoYz*7hc1i9 zt@;iI{dM#YiFl~)9f;B1j@W^i-)sdgU|Ck(HO8qw3q(6?JQ{0AX8A|{&>Sozc%RO4cn@a{0gGXz)E*q~Q{ptQ2w zLV6^~OHjVn`d7*%eoygAZwH$qoA|dfoc6QEoAG)ZE2T-dzrWZDECQp*1vqz-9@DF) zh!poc*zN~r79gJz@kP7!yY6R%}xd2ny%RI9to zhYKe{%F~v)uE-EAPzHlG)NYD`2lt_xcS0eox+!@3;^1}s5^T9t z_;}%1%wIt|E4WpOk*D^xiQu0Oz zer(_P7gRO-ne@eS3g7`skw0xl(-axGi<`I+hW_^eV#>)G>#eO=5t3nW9@L@{+(|65*8tNt{Nzgv#d zAzAth9VOqA%9C02B%Hb;JpeN{O|OH7&p1i116Ic9l!>2!!V8&rT83g3FJITcH)5P` za#Z3ZPSbo&5B(YWvXE|VNJ5dBTU~4rH{e#SFh#%C5rcUdIt&{R0Vp31)X*>Hb|sGx z+`4HyZryVp{>|WA&uU6$a*|+u%gI>~C{Yg1^@nuSacw|wY-1zp^@|Hj`b=^6B-W`9 z)@75WdP)^eu6qc7K?pyO2MxMe_M>oDWBFd(j$mxQwe2UU^tl2gJC+F{V(f z^aWY*dWU-QV;ZZ99}VN3Eu52=<~p3wQBj56_Zg8F{9|ut`@VQ7amkvL46B0)tU`+V#cxKPP2Z;a(l zR@w6Q>lD+6C%pZseLAxAAzy!?vt)Mkx6Vgtwm0rK@qa@fQb^+WJa*sNb9?8hRWOt> zRb+u4R>g57nFoO4NTtxXZ%N6GD-oYKEy#u;JwNxu+Al&co$Gwg)%Zr~Pu|d2@67R+51*{KTH)|cNBP}QmS7W%^!9`6g9W6$l2ermf zIPii@207$JW|4%&KA6@;?1_&oW}JC*PXcs&8K-~LcjrQT_KX}-<6Po5xu!@&1&M>~~9N7DJ=bxaDOe!`cAw0;pKnen`37tOMVVt*hkn zHr02BKFrX{9QS_VsrzzlZdoD6%S)m!wu>ky-Nv5%T>ZI>**FQOv7M3|dVoDT0L2{T zUcn2_nePCttY4Rmy|*ai$i%@-@#~qU(k=9iP2lxuv&A^+t!0C3bSn-YBDDb=g`DxN z6&T87mRZJqp1S!5b0nicn6`7+q=pzBN5TO6r(=5bC08V8EEhKHcwUUm%qz4#?uJi9ZLLh z5I(8Y9V~&CzQH_{*x(BS=Pr3Q;~4xisH)pOuEqv&g3b98gD})2LO+Vw{OUcklggF{ z74csMo@L0bKUNVD$*W(6T9YG-*Gi6yrW=Jv%?ev3x^UiL??@}x4cFFOwd`ZL=p*>g zah-xRwLSB`zhy}ilL}gl+OPllc-9pyL`PA-QnmT9fnS~PiOhxj5$uhe{aDcHi@jlv z3(LcCA!zw^SztbLG>Mw3abt582YulE;vHxrBJ4fZ)d~q?UCrWI(15J zuCNhHvKKs6urOYO9Z5<%dwwe`rB*vmHZlaKD5{JH{d%7=&f9$mQ-z`m#yS`dC7F@X z*Q@XuRpj7>;Ym;I#(j4riOj;C1h9bGr-THsAv2AJZxj+3-zBO+c^!d%a{4%gZbaU&3OYktL2P?!2P`TQf@?g zF;>;#Ye9__TLiPV5#_&S^iSHz*i*cKGecbP=>3@XD2<=nvSk{Jipz6hLRFu4I_WK~prg&C(l|DUMT&@`QlIQKMmvw2{5$$e;c&i&gC4H!p=OcK3o7P(@^_p99~iHglHpGN+Lsd>XOHLM!+ zD%k}thLSD<26lLh%H!{DBVD>fX-rh^S(h~&78QR936eCAs-|Z1P|f@FidV<}c>R?z z!Qw9d5#*dhuPbxBje&1YjTf=`CnhRBwIyq9<;pjinf;^ENd>XzVa9Jf9n&wGy6{EV ztnd88g$vP`26*OdGiu*@Kip<2_jGP+ON1v*M~Bs4n0D!0JU^;)DFtcOm!R&NVEyxd z!B_|ytX1$D&5gW-uc1QFHfbpVhZ!kQHJwjl?A2btUELjm0;C4=0ekW|o-Y=axq&Qj}pWAC3tA|BhME50?w7Mg*T zhBf3VrQhas|DM#bFoJ6|DA>bfR5D%MCT`)u9yoY#=R{oza1jqeknV$p8`7?F{cS8( zZmYYXRleya`90@3l~l|rHT>{%L!sO4Rlbvl!!J?+$}6^DK#*L+UoLHZs|2<6^Kz7l zSs)97JjsIt6{Gg@(k^BW~!d`p{D2hl%Air4v+Qx=zgVtWJ z4>H7kb-h0S3x0cAI0waAX}I4G1N~g78P6%LQ^pJ20?c;rU?0iYuCCcVR2(QFw9ck0 z4O)$#N)yMBQt2O?7zlEHzsnh1;x{o zlXL-qI^r3&4IYQ4^Qt8de3re5LO5k!LRp}rRte4z8=Tw|pve0e(WgePG=hGlV}KDV24W8~D82Kb7@pyWUO6lUly1s{ z%4Z^1DSSXMc)Jkz@4eSdFWqg;?-E)eYn3O zTWZ+wX+N>u?GVVt+HJ<2dhRV2fYy#N*IH5m3bCX#O80+YQi>!4^*MPUSb)En4|4oH z0OUaB5ThoMn|8?ndIk7davNeUt8&sI)E1zRTJC#*<%}6PHUHG1($9sD={s@X+Wm)S zVx%U~ge->fL>8!M9p|9R$fjnGEvH}7VJRlAFG4d7eUz6MnDII$X$Be z*ms8?QF?6i49Fl%0+2>}bDL_z1qz|Zd7mld5(0GVHmb!Vk&nCI9{w6q1q@yh7*aHI zK~VQC-Nps=i?324oXSH+TvMwA)W=scYtRWzjtYQSc==x?=(SHy+tObqidtXXBOpsi zRe9I)_fR$10@y`cP)YpID)eb*R-#FxhohNO^3nRp1Q3n*x-O)6)`=qtae_JnWCGXy z_c%kU&B`DV(6Y(MFKLWpo;n=S{;F+6eOvXF71s%QN)AekQw3T}qNXow z={62JQIE|MRvauPX>RxUQL0TlM!1_aP`f2Crc3@g@FwP^!_-lB1pX=l`66c6>WbL| zdh{$TuV)^6iyXap_%v-6j{yq>MD1Sh7gN7N>?MjsjU0oQLqq3wWCch1l2<#p0N~rcQ+Uyjo)=k0R%L`)FMDtL;`dyR`k>TXfFT7 zW9QwdsQ<+=K;;-qjhnwv!xDEeae9$iYN(j@*&Y|d+vmxKK?+q;R32Mcf z=N?fe^NyrgL(7ZCvzU!J3=nsa$O0}1tUkuxyS*7%6L2A>Dgc+IV93cXm%mcO{uO?` zd8qDxkKSxu=Y9Srmi?w>1Q-xE?!@PomI-KO;L#qpB^qnm-q@GfRRU{ z)Fevr_ngQek^VEeWr_8)^3jlx)ej)0o5PvNuo09f?KQnj=96z}?luM0N0LU|!5CJR zZuMOBehJ5U#5Ad?_7#FnOXUVq3{z}`S5dvs+1-8(el(H*O#z%AVOdW~8P^;KV^e?N z0hEP=G>Vz1Uxc&F?8mvaoH@lSOo1!V4m~-t1qp!$Hip)z^vA3+_5Cy*aar8m9laKV z&eRs%f%WJ97Ba%}(sgn?=T8f1E1x{0yF|MS-qgtraDPzUT7i|{cY+MF%=&sQ|A_*+ znNWfZMY!wKZ13o{>49JPFCA^kvOqC5Hf2NVXle1 ziA-MQQcBBlvgod{57rB)H5g!k5CKvSJ|$t}Pz!!p+F?s9QZ6Z6BEqm`xCWhW@K>AP z_j99b^ zfkigk_vlLx5_GHaB}j)>LIYrNPe1V96!>O?{n1 zwi>quPL8bL*)M#lQTnvSU>b%7A>!98c zsj@z<T*H5Tu9qV7xd$V~zi%ufD?C9<+%1+0BSrpx) zrYM`m_c*JDhJDkg9RD7qO?_!4Dw%=mXp0T%O=?UWdUQ!I`mZE5U@LW!JSnLt~BNBW-8N)G>{<6z8KYMaybeyQinD;=38701C@)b*uj z!yj=N8uNQiAx;m5+j4@GXw#{A5Wgg0@n9sOh*LP0o#qXbBnsu*D0UDM`sNlq3x1LD z7_ufetpb>*S4sX{?7|?6`%spB2e5fOPh?T?j5pZZUp6By;&Et4>Y_8kcp}zC zBxFHW7kITJC>KS!SHW`V!A=4g3Pe6<;(Wb{5yJ4Y;PPcat{ zug?QE@Z>5eQYVLgz@fv{1Xc!_@Oh`lGlATblVd>pUyENMtQ z9EA}!UE7MMSa}<)j(l77C|0TsUS!QxjJ5D;HgdYDh3YYoeJIxMoQ~)gpGmo`OywR! zKP79w0O3JlQ@*<${s0SS`XwHylmqY`dYzdl?}IV&iDSpo0~ZZe?VM;cZoyFV+fX+n z?AcqQX(&+2vY%gy%=>m5Pmoi4%A`PT3Oo{Y(#eS3UI)1{>w>=^Afo`v{@&-f;CTG} z=dDBh6{@ZQ1_XLr-?=ghouMayGmHkIh@cL~nc+c&>UZlHc@2Tz^G4;OFk>3X2>*F! zp3V0xt<)q(ioMSJ$Ds|A;&x3JUE;;q4>-51FUh03h@9 z731UgFuZ;8yXi7!8^$6-;DyWjVd_pwqVVFCXE_mPKX+@*0ioY5)Ch)pxx0G#51;=_ zC+U+sDS|AWW`i9+>yyfw^7Abx-Z7r)f)9Bax4DNc>LEHSBtEckyCx7JZ=-DdlO?%_ zgk9grbau?t2PoYAqT0fMwZ=I7vOU+nd6~>7H5BoywvJWrSU1|CpAL zx!d~*5$)!r!OQTy?>sm34VE5mv+%|B6-tvqQsMPsLM4s4H~HP2#V{BX5Cm~MpaUy; zaAG1Z%zL$BBC06_2O*XWVjlDDh`!<3x6i?~o%` zXzZiA7|qjv#c1-KS2|G0#LL16UXitFabx3*{Y88Z|5rN^nZ%SsDe&G z1&Du+v?5-N_|isf@gR;v3|j*-uZ5t>MUS%b|F?b&M#3Xpm9ZRUu=VJMiTs1_+(eVu zU@I;}X|NH?v-vYMhy+zPsDVt!gzbv%+uLl8pRXEsBsXx5i=gL4WVn)g$DJJQig-m$22N5XQJ-Z>mfP$RnXQjUz z7AU#yTbm4D-QFw%vCqIr%*l%~&#j(-8>a9XtH#;LD0} zW~mkVsVssX_huxD(=UNr^_b#rlC$gwIYyQQkI=G#KKPBt*%}AC`#X{tr7K!ZI*Zx1)!9Q%rqnqg;5BoJ0ffH!qEBZ6#XVfqb zbow0q)}=zZC<4^aEFmhM@H50%6HB5uc|k0Y$EeXc{vBpUzH&7&&ZbhD8%Z|nkOSZA|G!z&6VPC>`h%Z!h;MkZ`;`qukYy$^roh5R|xzF41tO(IS|IQ@S~ zmcei^f!p)BVLCGO79Tj^O}I6X2fXO^$K)sH+%*rAl87_Bz{|Y*iAJ=cWkM z_zKV0*7*H2pX-1b!*4UhiP#ua+64IPygr4Cbq(~;+Wc9S*Qs|l^JZu$EWv|=UHlGn zu{=bq$}4ow(tJLQV8%zK)@<)MRJX$+iSqNb9{5gcJ|_!z_sN`ZB>KsrmC8EpM8@>O z`QHr#Y(HXgMXKE_FEv*C$$8E-m}CFsxMpphD1ESF)EYadp`YV*@OpHZ^Vj&_7OEJ$ zl!^nIaGWd^oV|qtDA7^W2#@86dS|P+5~!Gt<+TfY;m=)4a`nY3^PEE8psa3J8ckOA z_ttMZ7RO%>$FQBk)yDtQ%Sg#f;I5WGNU9*6<*lV0?xoEI$WZ@pPl(>sIKTTvEN#Ds8yfi zAV82r;r_>}HFN-;e_ypGoc?{X{c0)9(_$tfQb?}Mu)3SV?KAvnxBw3$BWv+hXtimE+y89X*|oD zn?`|FbDgiBoSh_nrDt3Ih4lqnx^{Xe9`Z5PIwneYbV#d}nPOYczH{ATV=*&br}fOH zb-7cf1}||oXiU2ny0#sN6fw!F8kgb*Yu%e;PckF)Z?=Op;otwZeGs_*JrwMXfa64z z76a`Afw)5alqCWlxEJ}48ih>v)Hl1A6X|htnlD}u8tFjrw#95mghkw)sT@ssWO4aG z-<1@b;Hs(LBFQ{Idg;XU?GVQLwepdkpgD0$$jGFy>}-y=)kuzw&RPBbZ*7VgxD?1b zk2PHF#^q0DY6U@{n5vr(AIXm~EfP;3ALP<-a^OBL)byl%m;&r!PNWG(7%R>boPXM2?0H-uq3ox z_mjWB6aNvcL%aBx=6;VF=3how2!v(2eJEFq$blXSdCQ6*d7f8@=5_pEI^qy|mB{6w zCz7LGyi6JR96H^9rYno$aE&?tTSfsbA(eQHyvhd$w!JX|zB6=D*&A{yU9U@JAMsW|I~uGZ4!)6E$7` zY$|#5&2T>@Y_Avd&DJ>t!4s%$b5J4dSaAPG_R2GsWidWcSaJ8f9+h}q6-noC++ikm z#uxF7#Vjla)g2dFJ{HIJc<9l6^Jdjk|GX^T1mevuImH+x=JPY4esHXx4(l)2bCj=N zK90vK`IlINAfCaun9q6(#18rG=w|;?a5&%X?!q{h5++EMFnGA)@AfB?z{u$L>ZYiP zxub$Uyf*t|qkY2O!FU7)-7*>?O@C&n>M+c75!iA|*xS12RRraWPR&Z0)|GiAhlG5U zuP}vpZa?Zixw_Wxn6YMM;Yy&~&=Avt-$7H?UH!UQcmE-rOb}5sk%vp~NJPI>70(N| zWE9mMEc{<82z*0~0-S!4t+HUy;1y&05RSa-^n=@%|%{`s&5 zjl)he&2tMQx5j@~ZLaY-i+kGVEGS!6DUdNB^8DzYc9pU(26*Khq01C5En4!Tj2%|} z0>-J|0?94f!;!+5*k+5cFQdyw~joj7ohMOU)eddvD%AUub&X{HozAaLjy4 zn)q=)50956+;xSygMi6TCls)79CLd1Q<)y+?-YTWExtagivlJ+PJztAuYO9$*DU&_ zlSnujpZA=(Yk6;%OQY#WGr!*@o>%Tu*Z-B<8}Brzcc$?_9#aGz0CARl20mF06oYh3 zwdbBOt3mAtV;2Oc1QO#10uI?g;9(qtvnjjm=GRdA1V{vi#eyi+80dniYS9xPNB7xg zEO)XsFtF61?qPe`kBZ(65;8It{G9FY4X;o4mNY7x4$(nAP;Da{2!85Rz~WS<>>b&z zB~L5vOGK|?w5pnK$mi$1ApA|qMm-**@`gBZQRX$P3PIwc^lt`2TUyp;*sDcp5n2^R zgx|`$txI2rZd|_`{d!)xz1zI<#-w*4J?*R|ZGK_TIL-A5vjBZ;u55?0GQGNC&R4?e z`AfCx<67F_v7TPJt<#gT)f|`2EVf_-&PeN&=HP*TWbWhuLGe#&0<=F<1+2ck4|t!> z^nxllh6#HDUgbZ&YbC$Fu%#Q_A3(*i01Z4PgJ46WlNdE#nIa}W2t%DC{!&FG^%Q#8 zMomzW99`#xv$vE7Q~*T%m^(9Uyp~!RPP6F1JS4r%+RY=U5U(~4G&)(z*Lknz`glLQ zb#jUF4E6TOZVm@HyDm4kN)6Y0D_%%Cdf1;p`0EyB#>|wZz6{FOzO`Za(i0-n2F7bJ z*r?jZMv%Pe;#>ApHgPf*-}`S;CSE%}`-W_ynLYdRb=t!7yt%W__#J2R3BM+fUmvIx z8w+tz%*bb0KPmBM%6s?K2Sm)vL7~D`>pP(tm{3g$*p^AlE2L%?`}wgHWO`_%a>ao+ z*E&sJE(h6?&dXBoc&+2T&QbA#6pKJ&j}K@a!&(ExYo0H>gZJ|Rg%O1L4sAgQt4?9$ z=7ZoKhlYbLrhh7jfpey%rFB4`3tE%%nRSH8HOrSm6K{?uG)hcb115n6I(+vpYv;@n&%?9IVKw-tLA`!Jx0%nXpRg2G!+HWz-W4XTk4Q0UJvV=xyZL}dO#PXn zb#0KR$($xx@P18V{-D=oLlxCgyM2+V8p!v(h)PkZ2F)e~@d{tI=1c(9fD)Ur`ZdvP zCy;e12Fc;-XG7$0CG2x7U)<+jD;>x_pfjZ{XoRT)x{e`TOjQ`AgR=V1=bT1CCqN=! z1N>Vquw&7Gw$V~}X~n$uO}EA#Bov#~Aqc!j!Vt$+efDq!D78u&{eB@4_bO_-a_QmY zq-wp#>ntXl6jnziOz?@~rINX*-q=V;w;g`zvLsatC6Nzb{P00TUcU?plJWhBAoKfG z=y|G8V4CZ(cA6j=jRLh65?G+(G_cM45=~_LAA*Fgz}tlRm4eJI1Tj}4vFsBU63&_Z zw+g_NfKHORpywhaBu6Y6qhq{Siw^HF0{nqb+H{-hhjj09ADM(-J;ZEl=3`!>B=E3- zo}t)RQClG7Pn?f~brGx<%oU=R^3dqrtttRaN(U$qD#9B=eZ}+Xkl3gL4QQp+a0Tj| zmL#O#?!y+oxKj@MtaW2;j^>0WA_)7g#R(!wcY2Msf;@Z3*Py{5F}N!I#xf=ws8{_i zoh|iF3Y(6O4mpBVxRV?t^)!|-D7H^zCFfB>Qh+eH+5KsS2V>gk$3PE&y~Mi-rdEyL ztbiOI^eT$@eGXAQ2P=FKF)|q1d5o@!S9Zm!L zL2R0@zo=I4%inXYN@PP?)do!ud=yyTCJDPeTgmyzq_DXJ@$L*2hn|}R{IQaDmSUB1 zl~R&Q=CSOh7USf%n{-yHhHHGgX2w%VJ_3M19yMHE0ezO>-erpJiKgtP6t&a~cz#(- zM*ou~o=14q|@iRn*B~!508g4YcOZ)}C3Y z7*6B9d(9+!?uY^VyCwNJ0@Q@KP*%OC!Tauei$-DCbH3F|wsZ$uHAwy=2|w8kttVeBl-THW`ve3_R)`R1U4FQHJhKV538pX$xMWnV))AMg zmYS2Qm~1#82hvDA324^RG!*zyk+7v4+^*A|d1PrMEzooa6J4&pU*1hl&OsJ^g(!>( zXNX9*jVMb@QEq@Z2Hst!Y5+Vk3Rfz{rbKQ!%ck#FoA+y*uCR#LhZfkCV4J-*(h}zb z=SR%Q=%Mpi^QihM%0sqwcfm}aZ}IT?>SEkR2Z8ycX6QxxdlnM1_{%kAA${#U6CQ-gZIMP?!1)hHstrc%wdzk zT2YpfH6~Shfc~t$1HE48$VQkhZ9j`f%nc;f4xp_-+&p5@|IDZ;CvDil<=3CXuX#jy zVL07y8T=BXQllyg$kG$iI(wo*vSABBoAPKl$VxLFQ~MlT z_=Q?6qL%{|R%8wUROUiK+%*oY%#c(^t*b0h84}TY>xz=pC^wLrogI1KwVF}=9euP( z8*dnIwPh4>6Cb?6uP}^Sv)4#w828<03J$_gQXjDrvAse@QFFrfD}MH;4E)X&rbQ|) zjFcfAC0T2ahiBuR<#1UO72MS}qH-xWh2AO^{t%Nne<$F`DrUyq;K4#CTaTIyVz#+GlWEW>1m6bJE7xy-qd(1ml4$y*OgqIkM5j4HSrdEZ=9-s zYLNj`K?14wy^7QA_$oV_wrx~Oy1Tneq`N~vx{(GMX$cv+yO9#4QvqoZ2?dl8 z1%?z+Lh0^~eGRPVd)E8g@3(*Z@8z1gx#x=WJkBHjxK*4@DvqsrEm7=V)r-niS75H& zjNdeQ8hE(@8Un+OGB!2bcLHQqpdT-|9tTxgdC<}Hi-$&`rf+Md;F+^8%3D!^QC;L^ zSI+&>Fj@gn?iUjVepQ>2CaY1`yV3MmquTOD zEGi_Oke*W!8_b1pI$yfL=JSrT<(zIu)R~E=rr$ol@NdeBVO7AiPm;jVv`Sp@ItD09 z{nK{71i+-js{XVsI8FJ!>WO7F-_$0j@^M*-cg<*KoWx+8??+=~N%i!Y^%J$MVLP6* z9~4TTF3}*Z*V|euldn6SkyW<(cuR!WSuXd#ki&Ge6`#FGPA6g~$lF@RS*PQp^2*q)s%&BloA)e|g_GZcY|Zxp$C=;S>0W}8KW1uz z+TY1;!k?3h?zNbgLrbJ=h5i0w;_zV4E2F&i6>=Cu1f)b0BdWT3>4$ zf2G8>?-g(w4KD+62bHWid%ejL_W^)-ijNS16*Im5BI#nwrC}?nw%a4>BszcBc3`c# z*O{8eXmgt?X5a>J&Kg04y6^5hLg}NIk=blhB~``^ER)=;vlfjI{HyDWo$k2kUm`>w z+?GoZ1Vu7=%m@~u&`!czu+niF>;_4sxdBC)&2tCMx@ajvGfq2Z(%T}wEa;~7(as6w z5(*I%%^M${w08U8iO~j15fJWU9+<=mK;(HY9I9;{}7-T;Cw-w9W=r1NvYvTvTV`qBqk`L-GV8rb=D85hhI zlA3-{bG)4UJ*O~i@S~Y#z)Dr&YHbNoW!kT$UT~!M!jE)eDbHFB{+}&A*!nthsB=oI zmd%@i=+puvKdY+Y!=o(7z&5dNKY2PN!Pz*Bqb0tii1=-eHFf;c@Y^dAa=c+|vGwIa z_SoOWVc12-Uk>j=Aowf%F=Bt{B(wqlo)_>f;9~Lc<#OP-dHfG6xip=KTH{dIRZ8&} zOa~1xItB?YD?=}=)3sv^=2THCHb^VkOlLF096AoItKi?F!TaKsBChsqz&oAUtJ;gD z-<6G}_Ht|m)l~;NY8&_^jx|nR27V>>z4xU>(8S@Qx%#g0 zHRWTnu3eUsPf#vYLFtnr{M_sCUBIHg4h%yZz(vRQy7**mG2Qhy8xxE-z?8eGn6R!l z8uVxhj)&S!-2%=#69p!q=ivNL&oO4F^1Vo)K$F*jeBl)D&lzShS|AvEiFC?-h5f6M7wH7vCLH@iCkHYi5sTHQ~*kx^^BqwEHnz81l$C%b> zA5d?$rMD4P`m_Vz)*uM8xcP$+QGM3NK5}Oif}0(70Q+P z;yAy~a%a;&IP~wd-aK65G7E8NTwhMv18O^br(a+wONnSw12pcJ&3dwe_=MabU#w{% zLFM-Bbk)8+JLkC{w%-3rpHT=)TK%D!15uiL(f#AN_^D3H6sNDP$k&cy0(w8->XjtU zhZraemT6{8hD8SZDiD%>QQ)i)>cGFPZBPHHZQB*Q3T0KC1b%n;Yws3=d}3c)Pj!gM zLKG#~`Ky%h#%<=huC6ZJn%pqRFfeTnsJH7$G@MK6bNdyZ&NMV^;_FDw?4<>T`Zyuy4Cn$^3pqwY?rNcef5(_WcQb3 zDUhSeC&>)uH3d#*7{~-43i3?hi6}FePnwMjdT5znh;s_Fo*mUCl@p zIQ|AHI9yddmn(cOomU)>F5(ER?G`H7QQ`R#u6)To`0REYDt zcXMQ}4jb#9=EmYK@xkI~i#qSxB%fB*KQ%bcE$*e^_8vK`<^mb3!8Nex$=sZ&H$my4 z0?u=Syhg+F(Ac{(RS$Q2MGCQ5LG920SOCQYD3C;WPPb$w+q1E#v-?`5HoM9AeekuW z zj89#pT|!R9ZH^_?E{+_ zMw9o?xyXLJimaO6LfP;^)5KTVykC{b$+_}|=GSllavp=p4K_Z-LDp`uP&h_5JFTPt z7{(+H{$sAKpI`LN7RxMvBuI2j%yEHWy%id@8DRdg7c#OrxumA5V)4XiPB2G~=u{k- zqKAr1uRLDZA$pca+~Cl0%`zF}(xNoU-h*m#s)mUjM`dfaXQ!GO7*<8yHw<#53AwMF zFs3mdtUy|g_>1wY%^VF!w!Jvn+<&@XZGNU3XR3oE(^oDSz?GPyr;?cDpdVmRU!AJO-wP1I64_eee`+$d zd75s5?Pc~RRnp|#_z8=Nxe+FKA1L{rWlp>#C92=WeY$*Ux^?dgn!NR{t6$1=KcY#Z zgj+unTKYXxl<7r4KPkU_hCd|Fg7Azb8$P6i|pn73T zW6=O3Vk#44O4y(90Gl00rXgiBmG=4Wx9DMPO#}{xt%{`iWSbhso(r#Z_RFaaH8$sPc>L`pij6}0jD6~W)ZE5Q zpp>%O+{Rztt$4sa!{QFw!kRkhpAEKKI<(SvgW#(x46-p-48InY2PUkAQx-wF51)Qp z-BW50y6LPyB&DEG1+Z>9MPK0d=j#j#Jvo)h<<~y@+AY&_M*K^@__$V@lEGW%Evd^v zzqIL%QuNAaWMW?B##PwCoN>2~O1M6-Ft`6O22JemYqYW7QhtqI7ZgjY;lF&?U1!@5 zN8`@eY|dBOA;h?)_Q)=??T`92^U4~&3T}0`^6M2Em^n@u{2tKP$f%a&qqkBmohbnt z+WP)_%q|*kLLd0w6X1Vt>-wqQ{k)b#q!Ep!XlYYx0vKh&Dzu9UxS%NvimQKzSc6`F2ohkmfe8`2<78EghC5(}7hV91%|CVk z0(lG!fXgL(sPgrTF?h_|vw%me(#Okv%#QQ*%CsMIF&vESmeNTLF3nLnMHo*#RYbH^ zqL;w1Ce*T24@c5PbkjpMou^~`+e16p^RhJO&a^f=SSit7STgt(Gp?tGb8F4Tz2!vl zE~&0AhPiJ73oe7XH5bSbmJT4_U-JvwX~(`75oaAFe!*9`P;!}N^RZSrPMSsLlL}3B zTZ@a%bLI4&f4+IYGZw5hJ2Ako# zCvx{^AK+El4Q9YvJyt%?q5Xc8KP!rE-0IFO2pORZI{T+78zAvn=x(lrARcQARdZ zR$r}FLEGbv2PMdeucFuVd-t1MUbr6o>fZ6;{eZh1Z*$99}I4r9(fb;?(+TdSUYBrxuef1$o=e-g@H*CD2NnC(}Vc-^Zm8;J7~iMgRa2VgPp&R%`ihkO%D7h{t{)#3dQt(>SVMmVvneVmVbqXPpveI|9DTMJZqS*EsHqDK zB)?psg~|W;4ca(lbc8~nCQ7GM2SI)XKr9lIEYsc$G%@A90sD(FSx92Ro?fdXvi+eY z9ecf$p2wTUQ3pW%mC2!3Ap=4$%Ym01w`>-@geKjS-Q0J1GO=gf=QlTVgV+OJI+RfdvEGR;BI57C{Z8@D}i_DFHesbOsTy zYMCGI-lU)E6?yHKV-w{GkVmkpY3v$Bl&j09NhExM6?T{m7H#b4ZoE>0FV72Irz$~- z^G^G(9yJAWGJ2F)YCPSck8QvuH^x=UA|J2NIQgAQt;O|8X)kR8%$DDKGBJ1)+sHtp zO-UjY-ql@eHqJ-L;f#T&44*=C1}9D^UPT^_VA?J)LuasGho@MQ1uJx6BJqymN?!uc zhFD{|6V9OP%QQF?d?gD*O|ArxEDZdK&j8L7YWX9Q?WbS*yf%PlPs01QK+v1J*p8<3 z6%*Q5ARAJMwggLx+64&;pQQ21Vac}7`m9iY@K{N#g$MHK08T>3U{h>uVNH_+o7|V%`T4|7V>| z)NV!yWIB@j1I8SuUPbblkA9KY(pO?W^9(NIKl*-`r+oA$m_8PC0F`RSJ~O40p$h@Z z4^t``kq}gKd6B~6x;=AXwu<)Qjl$rIz=@|vg7X5;MoW-qx!Ub1#egBuwNJ_r&8++i z?p9gHoGXJP)LkvakW&YN*$`7YL_}Gwh7|AiJ^X#HEn#ZKmlWNDcJx6Pu{_f`BkQ~4BL^ZNbsFLHvkiqISlH`y0nDyKin1y2lOQtaaoY7d`K&Ykd}OTB0|0g!Mm2?s=vg;jYD&eha-em|Cz7~V%2>~bgw4{a?7okPio)D)0pJmP5sDmKOmq(BH;O?& z+FgE&?FA{@Aek;kPWv|QQb_osMM;2!V*hAniSYh2ITZ#uEax0aSbYF)X~A&5l$~g& z2PO3l^LDvXo4H=gh2>S?y{n2>;L4l4J}xVgiDWhyn6B?lRYZ4eVmmWkcDFix{v2D3 zcxXVUbAX|eS>g37yXYD@p(aY1)19O3%~9zyp`=s zt{qXtZGvhrbmbJ~3(|VWP|E(;cg0>JSF|>+`z2H9CQ9t+bXXg^x=-@3c0rbDEQ7%2 z;dQQF$rF8yJvlpZ`!5g1--d1Dag&7>k@YjK&vvx+d&o(Vbnj>_;F>>ZI-jex>Vbf% z;a&+wz{LIBY7Y z4|$V*jyD?hKt%bwlqcT9BpOMEWVXwK5UOA(9Vxq*n8{_Yu@T1`xfBej-}nQ`MRrk& zscXSQzom3Th0;!tA+Nir-Nm>JF<%XN8%Y=P_5`aY7IYj5hhhs z!IP`VvjeZIuxje>F3~2@^ZiG#+e_~NH47DFvBd+5C}U0DiKj3I9BgTCS)L_(1TUUe zuydhUqR37$lWPGVM_vayKl|LvsJYZ@DL{4F27!1}w62jwaO4qA3$&-5jS8A95OW%z zaPr{k{-UZKZ|Q=o->c8@vO(cAWhyD%)pdNN9CW^+nT|z@7%zK|7NX%KV&%y_ zduYo59baa*|GV|1AsjURd9z#leAU?xv`!QTq2iz4Qvfon|JLoIJ$d4>J%E;V4`6pJ zMhd)HmPAEO_E#J~&%W!dpP#An8hw|C+->i2{d~)vk4}2hKRaHa9GTXq)BI|EjO0L6 zJgzE5EFmu)K&-g&G_kNVPKa}ng&z)<0>>N0mw_D$GbBM8Ii%{P*J{n$=PMpe!F)=4 z@m0Be**MM?%G9jfh2z6^1&_=8` zEFSH9aV_{#x)f=qlhC|A{jXG+u3sQQYbFdBf_^^sT0K7ERBsFoke~T6tT1@+z2NA) zvgzYWwuh$DCuE?gc-Y0@+l5>VY}FciVbW7Px-SuIIxGco3yBe?DQl}<8w7;5-uzX@ zeP&smI$pb@!lC`9IrtCh7x~6h-6c!2FKU7x6a~Kw63G(hG#bkF6+Ht0*W^y-_d0d0 z{Q`hSDw-CTVTL~SQ>ltw;fZLU?z~YzM?k`tYv8!Sak)x-QEjFHxF+&yP9Ekwx81>p{^9NmQ3XTf}DqmACnBV?l;3>>8wZu<`7m6s86gI;+MH3(0 zW!I0t7#97ZrHEC(Q1CIQ28Vb7cwGVnx9R3b;12pZmdm6qu?aM1{D%7hjo-! zY_~~m!?pDa;?rqr z|F;KQuaQuBp+j(C0eu5PjF#ye;ec{R`q*2|+7so9*(DFwH-n8!o;O3%lrqcjUqzYLl=bae5sNv^E(m5|(_?2q}%9ih5r zO8#N@?Gc*iF6pXYH;3QJ4UPA<68`mgf6O*pIY7N0^W2>4NKA0^fc){co<%rr5t~wm z#F^rwlZq}gc7C~l^c%(+P@silALo5EKfFPW;AMC_O8d32AKKx5FvPGj*ojzFKDf0> z#jV-*H}G0HSkffV=H36gT^aGJ+k)mwq=+qwhH-!GhJKL<7sk6?-9GZ=mNb8h!~;IkCZ~@LC#~0hxT-qn<8Fj%-17 z8ro>I00_-*p)u9B>sBNdRV=}mO&eLs*md<|u@7oSdVdF9- zKwzi%Sm1D`!@M>tYWMzau`KGkO7zX`&?tdH#T+?yn^_D9mdE-U+f`1R)rlBVAy12Ff_Q1vgF2^eo&#YeJu;E z-p^!L=1un*qsIZqp1U8}B!tBzk2m+WxXW`Kk|akUgzLVL<5p1!<7MEN6)#s~X4Ku= zGnS+oLILXr>PLnSPXD}`jQ`*NAE4CB1k0l)+-L(MpEC-8h5j#x5}Jaee~x!W_l5Ht zh0>q@^X+`L>8Hn2r>TB?P_Feer~KLe;(+^nZQ3^8D%EuQcKLUgY8qHqp-WPaZfp_H zuRJ9%05Nn7GAC4`s{mqE;_*RR*LM8qN#%)Ht-hGJwTR0IADCz8-8sVg6eWV9OQ8{G z%SaP7L!*$JauVgs&umz~s(z*JTBLLJmqC4C$GME?c>CC8AGn6lvf#6;WfM)!YCc4m zgDIQfvu3=Xwt=eygDPU+ECHyIzC3AavSro)zspN*M}D86{6 z@v-@E&HDl?v9WbL^A;Syqy|n~SPC1<7C|F>7R|7nBh!&zE(uk-k+_XSTt-@Rm_0uw zBR;|Lc(sqRlZk&<#1(;iJ|Xw@>I1|NKzx_Mllf&wbuAaNHN8uGwSQy{?hcNRHa&z6 zA432%Jje5p5q7?EPApdE(MD(3mp;d*NuwJw#i7$ycYuzXS3@zLhqL_`Y51sx(#%n> zQQ?up<$!|Yw%iwRE!0Bqj;(@+-d4!p!ayZ;P*qBzcBs)fIZUG&WO4Q~+h0N>?$xiU z!Mpd~UPXgzANDEzpqxFRnTm*nhR3{#)kKp^ra)cvfa1PzI8m71o5l6n+@Ok2 zm9FUSNA>&SFU_QeLHgaBz}D39cV^-}Kgc#?1Ep-ryuK$|yqVCNB4(T}F1^C!vuE5r z68|9`;q;LO%weB}4h!TSX*3qy_tvMmcc7h~ky6(63oG&#+4>e7WA6Opfj7hN_lM$w zlp4iM8`~Ff|`?OEE+HwmI_PN`ZQ@a9@=U%Fy zUl+rA+b6Fk6($$Sh5c=fDK+@Az6o|^G6=>;V*+~-F{U$cc@pR}_!|!!44nj1;J@hF zn5qYa_zI|GxYJ^nbA1<+Ax-^+EVG#C*b@m|NwktgpH+{H%%p-ihHQ=;UiWca3@S>D z66aMrvL3_!L|{gHG)shzl;!u5-vga7x9^T3K{^^lA!Y z;trv$QiHwVYe9drTJ8-$e$MP^-;2lH9zVa&jAtd0pPo3&`>3_65>qs3;EBBt7*H1r z@}zB|;Opr?=-{HJq`bJ%l8FF~=4LRR0iFJ?(Gtg#QAd6Td&i4S*yNUDhAD`Bzg6yK zntxL!ACm?)qfk20|6ce|TB>nrkM-My{KUg^L|NOm(MBEG0t- z0!qoje8`43PX{5ll)PHLpK^xG0Q_YgK$@`1U4@gR*%ax{9`eU#kDz^o0hadfHCG=eC7S-ABJPr9xTQKu21!U7Y zF28PNk4x2E*?i4q$Z<<2>)Gg47#tSQEVn^s0y~l!K?g5-^%VpR;e-Q>cyI%ruNvT0 zh1aR4!HEvN^|M(&ao!wo2G-FuclUA9%MC>AX z#otKYz$DhYb|rqpXz+Xj!GhyT>2;qfI_fuz<|jp00rYs*33ZW@KJ#&fKI+nRyG&f< z#`1En0EJ+`$K@|O8Gz>?3qY=lv6X-wqA_mIM$2fmE8LpQhh=jp zZCh536j37}#>CxA@F$>Uz^e?$R`U2!pca02hb`;*XH;Zx1eCh25`|Mti_K#z$xAX5h z^gEOk}1ccsmltX-a*>^+({2!gSx-3^e>kv(5!d#6j?WGm)D zi>U>}m_TXmHa1|_DSr&Ct&gg@Dv#Oos%yzgzez^nt8T@}pvAa#;VsQOAKO0as=590 zv{bIoXj#Nyb3ZjRKG_`Qhh>i(n{mw5!maHR{gmn&hwr+EvnN)0bTlFT52vGvEIy@~ z1*sP9%XTsmQP60xkCy0SK=fi*4WIGbW~{pk&=d97e&!H0RfU!+T9uGQK`g zl({WcS^1dbkkLXkH$FZpw6gNt_{#hN5Kiod(vv{kI_ZhH@+>f~Run=kq1OcFyoiZ# zj>yE%d_$1$;{f+zINI-{tg$bec;`lueK<&)#-;CI&Xr4lSIxijkY4(I)Fah6MC1{C8*;jny8mq!g~ z7>4QrCxYAmf4K4!8nPhxBZW zZ!V;52h2tRIne_sOrhcp1?vee(O~FOyV*GAVru zhWGdLMQ1X0;4cHaM*E@;lg%%rND6eAlk|}q zVc>agGS~3qpnqya`@$PRjJQAuc1*(Y*=Hp>lY6qs)wN(y>fgx-MQmWW`!M6@c5P$` z?BLgY)Me{SFG;|?Sa;~pNSYZixwrlD_QwCA_wfzM^{K^NYn@XS=$Y$Hjx2LL$j zjJmJzRQ|3WZde#Net_r#R~&P{SLdu!E;_o{3lpfELKSfOjWG7d#k1#jWhnLaP@Xp^ zbJE9d!uEUL{oxBGzgv}IeYXF9!>)yzv zdRoByTSRFW@;otv_Lx%B%J*(v3QK-+qGO!L07pu@^x-X|@@nVtEtGQ2EtC}b#A`&t zaUkOX;=BLsP+(Z{-yN!)^gq9F83q{4K9i~3R^Zj_(O4pT0*?{yT%P#+iBU#a_K$Zs z3lGLp^=A+GDI{{a5bKdu4K!YuXN3cd%GA zrnnjNQB4u6&WFvf+1{=9fuTY7ZAQ1H%V(|E4A+9-|BOCnnV_LHXxO@nB zPVQIK@?jSlem+V6b3WylN)R#@$b*ktbJIpFNS%$I{PH&nqEtg7 z;QzbpUO|`c*E~`yE+i9 zFsa-=N9Bzcyq%lEC1U&mD!GPl);^(oo?c)ZlxnBA>vO9qAC`qw1|H|&NZ`Ynvj3df zSN80~pV3zF!15M*l6J46L>OKUqgWC`AC%snOi0A<<8sh05A}(tub%PSeakb&tKJ|T zHpa-TRIQnMw!EyuMWc?!bSCtbLjCxBW>UE+$JAf2{C1Ib*2nePA!CJrwJXo{ayWe) za8LNbIk9N|>JF|UE`=s5;6qFwr0bYH_;+#is1X+vnHQ1H*m#(_;PA_-y1d0^C}k85 z>^q|}m-kcW{V%&EcjbKiA6asQVmJc8#<^rFd34JvsSV_6}(Wgh@_pNl+ zRED!Xa&!!dDerI#4T*kZWwzaiPd#AT_?wL$N5wy7&pAv@? zmE2x9!PzPL1YW2Hs5>j`#gIgA3hTd3Zjm&-y&5h^Bx!Q-M?}HZ;Mpk?sH3T=u<<9R z=TLEYS-|>=UfOtg3~+{O@i0*LUa$jii97vB^{TeYAK6i56x|BfggGEiB3`m1t z(|~9B5b!KtW`WNEe5xc;8d**L^CfhQkyb`X82&?q#NqdvuxSGpX=@~l2a z{DS80J9Y#$M0(B`FCZILu*sc|#x4UXth`GDIZ68~gKFe?YAde#P$^nQw~a||u@~!D zx#Aw#Jg+6?T{~t*&JA*;GM+@JokO4g#RWiji{olE#OI~cv)hZ_^bqQ)B(nm)=+6+( z9!W~#SAWSrE?KGXl$kI&*?oB=kx+hjT}e#zybQ!dZ7i<@ePU_^W`$)}XdYHq09O1h zIPfpB-lvRHb1xQ?&M!a6Vf!R^KmT0I>fUO6@DO}EAE4~N^z~&hdHHjybZSGWTt|Q7 zbig;S7tZJc5J%qng3HIf$DBsu3IcV23o`Wk9hd9Ff0OP|4aYs|n}?z&K^o~Z5CN@M zZ6XMddVThy7m$9Q0l(0|1~q0A;r+$B63zTPbKkX6WFv>~|2&BRWf-;gz?c}IuncWwag5eN3c`%wN-wEq-o;R zAUW(DgcHNb6o+8aI@T~OwXtVS3lizwm%lv%Wd36)RyDP=*KUScy%-p__bd%^13^nU zkXT7i=wseJhy@91h7GcM4EXhRc`9d=)?y%DY}tvuS8f?PC3}~>TSE7Kbur$(6B_mf zCDujTYAOJhY|U&a@seQQM~?l6TsJiB>)VuzU(F9X;fM*%Yd zTnuL#gekF2|*WY>B?8cyv%r}_4MmYxWHr+H7hl)L#?+RJbU;Mt1-8{`uiT6fFzc2{79v@i3?{H-qNE zIa1HY%Dlvq_|*$~${y!b0CQTo-0h=v*#f&%7~o^9!NaXVjOA!Lr(p#kwuJ85e$Up&D+S@fBY-9H#Sbo@ew-~3 z10GEg$Y0eTTmiFHcJO}S+E@ozQN(UuNCn(55y(@ee9x9LQPy687<6C~8U~alO28Vl z5NLd~K6tMExd>6Q$pf?!oiDM(gx?T`)2net&~3yiguy?u9wN2yPXUAq2G2ndAl~Bx zoNfOQy|mBsQ91HUJ40XV%E^;b8qTR%h&Z zPGG`*eq1vBxwpi%E_y*ZM?T2thm8OqcCHA1p~4D3s4HuJr?Ha0KRZN`IxqWKPF|iJ zurcNv%dazRW$Am{wEu@EfVF_3gXsBTk(bSxH~ zQT*JBa8|qmnhEF10m9*PQLubuwgE#?8P1Q|-F5Q?tf*RArb-Z*2#WaU_<#AxLu9~( z%86I8;i}Pr2M4g(PVPXpJHv3xhGLs{NF5YaE=SN_6Ic2SFN?$)T^hN$g)Hle}w( zGKsQJCrbB-&NkUYp>ZDJ0*Hc8jOCEdy65Oj`giKT^7@XwU90Y3k5#YcKQZWyz zzk&(H6=+}7f*c9Z5)S1WGq5g_^S-}Mc-yi@jz$L65WbTNxoGTUh^YAAO$x%9uDc^J z(>M)z_(*7mc4Z=>q!wSzyGm%$FQVhhuDASfdAunH1p-9tCr4J}21IiXn)y<>GVgYS zp};9Z+noZt1l3CK+6l<@@yvmF*Ql0}5n0IV&fRWI9IuwC!C8{_BrW#FmHZ&{#aX)Y zKGN?rHio|5|CK51=5vt5;;12x5|7)eMMCJG;D(=%6v{JP;$D<$70Uf-2!Fzb$JaR1 z;)?11(DD=L0q08*VQ}7^G7UH2deaBe?Y-^mpP1E&$#@%v1=AUzM$2yi+%A4~w}38E zQQ#NrZy@41LTZRe^AaBo4^%;j$n8UAU^`}y7t(Ybo9l9zRlgg;v-YB*r&BAiUGk6F zHzY(g#Vl_gO{SM8kPxVx%>bs+v0v?>be~%W!%%U{fZ{aPt$cMAA{(8IT5r{Je@_cd zP6$|I?`fg?_={Jk()#x#P!UaxA?d)XO@;TIrDl6xLr)u-5q7cY&uAhLv_$U3?1NM5 zk<7MC<1V%jSh$M)1Q-~fbc6Y%0e6#+r@NSH%^heswFU0k1h+3B;Q#$g=}Uwp!-Lfu zk_$QhTXcC0=$sKQP!T0G5(*VdkU9)k&No6}3{hnFO#MuE-7u5xOW5ubF*JT7k3{U~cUXQY6UMkj>c9Ek*Qc1RV`WnQOHA_pXM+Q#hxpO2HY+5CWG<92T?V~_u%tT%

5XU!jk;VJkJ)mkiE^&EwHH2V9H z&exHL+=HJZ_X;-LRKYo->5yJB$!M=VB**L-O;Bl@bf>gn?oI?*)yF_8tcLctOFrKj zjcQ+2reMB9n|890wx2AT1Pzhqos83Na&&tNU@pe(mM2n+W9TOT4RKSr#CF#LP6?+` z?I4Uj_7cB6@v??C=u+GNCHBLxWHvp`23FQx|3YM;XzXYLxt;kVIiyG3NKDa@jAnRm zBWjgb0hbtP1YIX5EMLtxt07=CVI8%pnsZi{@XdQkczYqQeaq|z~ zZJ9io{auQE*GXW7013jI1Qiwf6$pj3(ALLUB-kkSB^$q_I0~7_fQXJ#8om9_2P=C2 zpu4t2c4NX_sd1cv;1kVbGtEod_dU8XSMgT@FE)*2BPYrX?tQk$LPlbue7K7EX`f8hBV zYYJPE17y92u3eap?z)nO=?BgzA};oRjUF%FvP|9&IQum7KK8_r%v-&VaV_Yn|&K+`VSW%?Df3Wdtj;`O7*7;-*+1DPNU|# zOha_0(aII}5TJ`x-{VkK)3)wwBvNe3i?E`V6km^^l%R(bFV+CNm+)BU5$aim`S4u< z;I2U6R4E)w{zC{HOAj@PYBB_wL@;ed7)=r<2``AuIs9cq?z`5W2N4Z?n%jj=uaH^q z{3N8C+1vYqYP*R!(f@?M;HCP<&z>qyJoX3gyOC!F1YR9{0LCNDh?i3TMkZ*YCjoA) zGi6v0t19+J48EtihJK6S3=vY>Uc6^8LDNLEl8H|J36G=SSI4_9PQ?-gTKeqy8TW93 zE<)sP>p7Xm`O@ywL>#ZCqqIU-aHT4|a%8%y1WZu%WPUv1aIFQ|33J{F`4tsXJessw zrB%jmLQwEL0tYdvEk2ybEG$KfJ^S$DwSZrd-G#9G^LcVlp&Hf32c-K81xW2gyXsv% zk*IElSK8^=OI;~bMMBUkxTVDc3K9%aR>3HnGl?WRqq1GZ5Y&GHuha4i4jo$M8*QbN zxPTr?z42FHJit*9;0(Y|nKGmps~}WzZwCLo-A|XIZeb18z%2N)szk&;Tl;g&5`>-= z6L;Iej$fjm5#+aE%=OQZR0DS4E>8&4C&yB$&AC>OC~F&=FGgrK^+%A9-A`&^S`|P( z&=MUXEJL}6Enz3HRTE-yq_Q*;hu*r!eO}~ zMrI~)QAU%X%e^QO5h85d3pzbCMuiHB)U}khL(tKPF`M)YBFM_}VC=uu3P#AI2=D4b zLUP6I$26BuBy?BX6@!y5yZz}PS!!1ZXEM(*!@q(zZHaU)I)(yxpxu7}4^&}E*t90w zq|-bS5@K1@f3ICn1VXGR^!j_;FG@etc|oK#`3mOEsQBXRep+w^E@JsS?T3X<;VOy- z)3>{SB5HlW7931)2pNjb=$P$q(azya`mbL=YQC^OmW7xlJUU%zm%w$VdF#y$ zi5AVQxGqk{pW`=2QL*Z6InK7Xs{B+Qy6^sG{Dg|!p@!5JFLFhdbhX6bJgnt$6N*^n zLPBD%S2%(SM!ODMUBbFM-Rr~$TAboPe_ERcTY~(JqI<#o9Sf~;8rc+L(K@@pmf#KA zATr3OW^j_x7lk76v7lL6`}S0p`|_FLb2F5lTS9?D_h|lm5m~5nU5obnKj21WARWHG z5djXRR!MJd-j5AWoOY7wPSSWUWX?{{OAK5r1V;Z90jP|OaeHc|NYD+_tJC}Lr>^v$~ zQ+W-=B?GL?KkQd{CaV?wp^WlWYuKVw3UNe5>8UXp<3ny-wtiB?LlF(Ug*^O?V)r}g zlG*7NhO!d(+3e0Q)FNXtJ|-{wko~L8lfj7PH@53xHp{%mY9rp$e_1oZv;yyS`IpJabH zXj4dTfn&ng1bZ?9KN-#`VbWqD9Ycbx8%ane%)Us~k-T_4024~`xjim2`+?UT^M_?J z@jS>h)&6+FhO)uL2j23e1Xc>KHrzyElWEi2za`;d(I*ztW%>Hi{b7Q3Wf9?RU=a6* z&UtZg0aY1Irp*6rpW#L`15${~q2G-ZF?!waApzVM*i6DCc=5Q2v}Bi51*m+28-8hZ z5Zpv5W7z+pk0(V%WXvZ}_&SjVrCsU0?T)XGO~JO$*cK8bHlXY!f_gbDg-A?&Ck-;M4g$efkpb}e!Qs<_7E7KEDonyI@aA*}} zVPWLc>EY3^EHW1nloY$k8q!pQCRR&NFOAOGp^!kIe@240iR?bc)Tf*XYqpBbXESbx z_m+czxm1Mxx9Kc8R|s1pqE42eqrq=y2NshLrUVAj_$pMOXo;~Q!XgM^F$sDtp$gcT zWy3-===a}0wkf`~KVW_UkMnvb+WWLeQr*!VQL1Gz{gANLq)VNw41WO;yq%~;f;s7GNi zb2ZxrER1b@Zk6F2VNEKJ;BHcpIV}4gdRmMHUlCj!b6#&?Rq~QqjRc@eg>0%JT5@S8 zH{iWWfa}D{nnR6e73RV3xd^;@0OnoAWy)TUk><(3YfEu z;VWI3dJRt>%ZAJ7$s_}#GX@Ox0y>oLJuSK(*bQ}VR7Atf+|3UHQu zu@;TNtnF_*n8x57kV)^eMfQi2>1mOik{6I3!>N8@PDJ8@x3Y6yZ2+{Zd4A^;BIFLh z=l0m-*fH)>81ysRSBVdvgO7QHE4(DN3vLI18(Rx{qW=`UU`$0|<`MrL?z55S{g9Ab zvIYk@T8O~IbzQOP@F4Jk2)PIC*Sp^r9GCz6zUPc4@VF+BK%@=iItpl34}@7vy4ug3 z+HP|jf0Q1){S(4u0{7VCDSZgr{Cnd&!9w%x1g-z}lN`SN`;*v;LjF*|)4~c*QE=pj zUh$WUB#Zo@0JLb+=gm_@@WJnwhYmsv$qE~zZ)^mr)fms6#RkMb zXxRBWvQOv%;=k08B8`8ZLsjEKz8@HQ*`t#9Nfc00JL9tTzIp=OmRN38gEf|pVD|0& zW!hX9K2ISUteB3LA*c?UnH-%|%pZ&L_&DZ-iaT%d5B$?4{a^TJW$+o)35ikbje{4* z@7n4nXe)<$qZ6K+Qlr}}@lYiQ1`<2k{+$CLOhz0wmzienEfBC1AB+?{=eHyIT#6Xy zL9ZXAR4ucNod2m70KmQ=^ub-_%-UYWTtfglPP$@Us*=KR{i|wN>LMOwnlaAyk(&bj zh48M6hXsJ`w{3bQ$wGp`ZN@pTR)C3u_^45c_q9?`uBa?`q{ajaNmaPS-841u=?Qa< zxrn^3sbTqny++&ID?elu#Mb@AQW|~(WB!%?9tE{f2z}MgQl>@Oe^5po%%MC<@yEGh z=_ZX3S)ey0TzTsJ`-6U(agAT}41c z7xQrfDR#o+y<4&yu<>KF`-0>&e8|t3IO9^8Gog8YgVBnKFfi-^Zgclu+ z~F6I!kw?y)3vmyVd*ByFT+PB_4pIc=Vt)G9S5~6@+mQ!mJGB6?r>$&;zA9#NdyQEOS=V~ zIvTmBLL(eE2TC&ac^D$B+uj?T%6f10*u%CSY^^_pjV973fw?7|xEC1pB$M!dDg@iV zT%d{SSagYpz+YIy%;mmtS}ME@{oF-0O48(IJqJ z?JAQ(MbA3~C>GxNQioHq+YvqeyE;4zeffMjb-O7Qfl38rIjGlOy&v|{2-+0C=6<79 ztP&rr6qg<<|KaoS<5(F~qoiN=`)!qiV*L5ZPojyQNbDWZ+lvWJD;$DEYD-H>DuhZO zghM)F2|hICMh9b!R1oE(_u;HRpKZjs* zjXFQqr03~aP+x5&B z(2$}vuWWS1H34L(1FAIc3dMIrU@6crT1;F;%N- zJ(a1oX*5^*8UtO7Szi)JUEDI=+@9|-*OxlraGr1BVYFC@it(xQYAGzZAKZ6wYOkHr zP|DUxyrcgM1FheB+y0k~^Y$uRQ}9NBK;qt;*L9#~r!3X+&lkD@T=n7MeuqL~0LY=? z7i?tA!0bA{NISWP2UnRr?*q`lNIujcHJI_qbIFVB4UXJWi!!~FAG&Pu*26^EvHTa} zzh5}B`Gm3czPbD@2sEyHi?o!nsHPEY`|R!C&)zs!QR(;hy+I!k87xLLgKPnSYCW{M zpjBZ|8aOU*B#VR+t@+|)Q=E}qn31ni?SF{94s_)a^&6~fS1rCaJ36}5DgXPj`O+of zd3KklN0v@r=NYTOB{wZ0k@((pstI-pUgIvq$xbF$yOP`^0`4Y_a4EZ&STqNdN*mX%{ri6Q^Nou^lD^U zJ->X#H~;tX?|+d0djRt(IGIMS%ZfW6rGi8euz*cP#CFO)`b$H?L`np@qHa z5CLk+QsfW{szNDYd5>!4yw4Yl5K_8}H@JrWAJg^H&w4uo&(((W&bKpzj4v^|1pi8= zyU#bSa3PeQ4``EvwANZVVX9m4xI5J{KdCPqaSNO;l8H;lwN*;T>6sN&xRo_57n@%i zGswr2hK!6N{S|M>RtxFi5RXx8T~7Man});fU#Q&9Fv8kO#wZ?hb_FU)5cDbdFnHrvYKy zTqj)Hb#e7(54v$V=AJnDv6;0xd+aa)ub%nUj*(Bwqb){+UB{>6oJ*sfKWBXrX3qdK zAaRu$a5NQb-+s=7ZV^1hp9E0L`Q?a;Xfof7_6_}PHVjG@^QZa>r0_m1s!tUo zz`}~+S!r}Oj_zTN^Ph@y`ePM!Fd3cHV-lswF53_f-u8WEAnV44#9-ePzlOCi>|OI& zTH)C??lk@lq9=O=ya%`|-qfQ4@-(3I?9jOGgqH57oc$-7<7b2;L!AWAgs{6A%K_ep zbgU^Tcv)_OjmKf(#=wh zq?m0PD76QCy&*RbAgVlm`A+t{)!vX3x5Grdfu$uU_1muzCeHswJRw`%$lS7E83!5gfh2W z2q-Cbh?KA_Lq~69v-n{=J@(#TIdsbe8V7>tKLH?$%tkJ__yVg}`pNP)3Nn;n%@-GQ zk;1Q2`Bk-VZo9fgw%Tt#{!PN9Xq-$B0M#)dEcor`xp;u#+)hE|bo_%vz~vC6b6RgfeTUxrNvXLzrqLdUj}H5#Lv2Br?qXLwXp&1 zg2Y0Fl{g)o&!KwY!Fd0BFhXYiOdN=|XF;K@{GLZ1SVtX4pMi9xNIuKJ*vWq=mtb$F zeVnGoxa&vJd85+~y7&Fb&l;c`xxNBf%YT5D(}qc3kf%b?`5L^9EC^V1^BSOZG|K)tHip2;B-t-jz z6CBc%Sz(b8hk}C+SH>>}Ot`$MMrF!Pk|Vf6$!rDeW1BSo;PGW1nFK01*_@I8a4G9- zZUq7rN%WdxTf>-te`r|)dir=$z0N_4w~1d-VFloA|;-|PxFCq{G0DVNfh|By!y9j zkYRCXf5Ew7fNg78WTL_FCf$tJ=h}}O%QNQBy^AhKjyIcqMXj9$Ei&#qa=~k0TJ&Hx zS*$P#u;9<;%EW?jjc<6|n2>?PEs!|K*tR{e%VcoI0ry<$eF$7UJ}*v?C->7-R#E9F z70DK+vY3v8`yWx?C_odJ`r&!?ZK9HSCRx`<5|G340Sraq*-G{ni&2NCqZwdQ3hXy} z$H3At8_wWT2I>p9Ma=hQ;IOp%l{RNF;x?oHp#jsMHR3w<`L^X-KI7 z?HcTHXmC3JW&}m&_nd>6z~2KQ6!EHU-o;ib25X}*FX&=yu)%6a8}qn6CizUSnKYcu zFW2^PiDwK_H62~IEP;m3&d%;sO+cSU|NRyah`{KdF(UAQgcGJ(u0gRmrYJe_jMU5q zBAe%Xd+iur!(OCaK%vHI0~^sKaHGs!0ePM|pr5Wg{|(pL$upa$yiuL~gp;%jczQjPX zq9sFR{ilp3Vil;)jq)Rdp3U#MgM_a?Qf9q_q5(jBkVl@x&aW61@w|~ zo-yK=wEV)>=|X92z`a+5w8dI>6s<83>XN*%-<|sVrRG1v}W zR=gT+)*$yi@5Qqn-I*xrX@(OP*(P>qJ@yU4nP*QHk%lLVvPa|lE5)4{GGWRk6}G@C zjJ-Y2Lm^!kf;m|Dzi{YJ@a3inz_d>E?@s)*Dw;+AEVDWx1S(Qr&zf_W5G*}#)PKC* zKpDv5{dNz`jtgJ9L--4oxB{ctPXcbEpIF#DFRe6yb14Wz5kT}Im&~Z^B3T2N4y2ek zh+NvmW?=!CePcp5Zm9I8hJgZtFg+~ta8NfI#UXGg*bdb>w0%Uqs=lZ(DO`?!u3L^G z%xJbO6hLcNkodC}xrV~x&F8XH!e@Qz`CsAZ3GDbCoEDFL3XC%2+fMmNpbv6@d~G1O zIk>imqHvk4J?NpUL>LFy-d_IyB+;!E;h@5V6!^mP-%dNZA|h>TYSu`q;dTV%GUq!* z+2e^cO6xNazhMZ!;d42R0?*?hHALVl+wuRxl04zz0#GYa^^$jHjVx07w3--hM| z=z~hK`Q!bK0n%@a+%&!U8guQK7WXp~!SNi}H+`?e-Z<1xx?dhH(F&mnuBUO>%EraT zkp{^RO*x3xte)|?pK=UN7wU%Zuu}dz)oolqK zW@Scf1}RLt}YYJ{L-rKl0xl@5gn<*vlzYq``+7W^b(h3J7TPcsLx899?)y-e8 z(nUwRlv9g++JbJhkhq?A62yN2DLNR-auCi97wjZDFq1i5Hwj=TIse4{+CAw{mRAmK zO-AsmT`5T-YD4Lo$3~A-7nQ2+kR!xa{wRT!UNaSN;Ljq<|=L2y%)37IF6KYUu()Hh%9 zV0W~*oXRQqBXFcS^YqQx{)s}P*&47;#Y)G(b9{TyBpaR-kou`)f%mY!nv%BYE3&?T z@vak*NL=B&(NhX(k#JvI)}SJG<>8?8s;eIyAsfI&P^^6f&Wu7zc)$07shqSHX0%`MGaKs!vGVZxN z+O8_x$n%5~ENQ$EE3hb8T=_FfII6M7)*^w9D1 z>0#&+s-eQGN}t|`-6TrC1Q|VOr0{>iAp|=5vk7$bv*y~C#C+c# zj1^|aJva?HL&QBYc?#3)Lk&JgIUu`=ZtF!UP7?-wbt{}tJP8J5Yo}VzOxlfp|DOD| zIZ3WlYzJ=Y1o}WazOVgo4JW}VEEOPTJI+?s_Jwz?+2d3I^ z@5UTtYVhfk{^!w+2A(^kn!%LOj0uuYpZqh?v(yWa4IJl6+i!VTKny#fT8d zDp7I%%VDCXcxcITFhJPeF(QF13uy~W(is_ry#I>=zBb%G+u6E^$myJ88SA!(MXls`OJ(vS4QZ6kq?mRs}Fi;@k- zyuuk4Kwjo0RVHwnoE#MtKl+=RL)JV5n#_+QAjHv zt-8e1>E8v%8xZ|W0{*Y@rww-igBjA}`Cg4st>=mRCejSxn8~bD-uvIu&93^AVWh_K zf+gluPt#KVX-evUQHBl9|A#Wnit(2+9E#!rP2_3-eIM{I7V1X*mc)T*d5(qOX*YCR z?nP_r=(7adi1>3daxa2?krPv*!;>1rup$H&f84`vzEw;cFBsW&g3b7UGoD06d+=XI zNbaXRMJ|H2*v#d&gowniQY5*#HW$c2cK;Vv%ur!psQLeb6^lq(`g-Sg%`-#tJ$G%! zLa_H8Kay$;ieD5Nl}yMV#5_2K{`zvGM13$c!Rmf$w6ya_I{1HO>UNybgu<{yi!H)x zj9G;lFC*Vi-hlGTP$T|d%7t3qpbJ|0(0)#6`d*CGx(>e4f2JPR~H@Vg~ zU43c0`@9(4e3f0#a<;;8X5tlD`YBL*GU~~NVl8RbpADh2fQ*C0Cy)Hw>kEx#WDFRA z(}M6^uP*A5Br^k9X2Ru621YE=s8ck^Wl;ed1x1bJrg^2kAircZ;=IkxSECYV0A$-x zJfv9(^SVS;mn`jxsy7vCVQq4tq{8$k`%wQMS^ojGQplIPb?Qe z>va!TuWoH!#VyVz4~95}C^QWR`U?0ld7XLOzevT|DIKk8vBUj!y*-SKb8bz7Q+*W= z{__CZQfd(+rn2Q2+CA9{;*RB?4#xEtkJ;!X0&v?<5QQR==I-7(B8upcHbQ=3N$Gx5 z>b2kr1KVgJqhUT6xtd{4;|p!SOuZDwY#{Uyb*ySs%E>9~IG)(Q_%oSW<&j}_@R14L zZei)Gshr-`y7@8uLhrQqn9HI5t?@0KL>eX3>+iUjG52c+dlk;pIK}r}a-;cHE|1y+ zQR;fLbrYz`?%xW|SO2JNtG-R~tY$)9JbquGcjaT9#*M^2g45qdt5W}R@`wcsom>_{ z(!2*K8AI7Lpt& z=rFULoCF@?yrMAGP)PIH&g<9YvT^fJ8jZ`)veXUjmxV?Ku+FNwKKg3rt~LZ(i*3AZ z(2Wyi7hZ$Qt((=?rsr9a6;{Ww`-_vw8E|@?2ksNxlYt%SF(|L4gVin00r z0jqQtEDwf-p*|IhSyp4}jQRRt&;IG?BiQ8FCxnBd@{r>SS0C%P4=?OAYqd^Aw9sV2t41pdw6G;vwq)5%Ah*F!g zdw^Ms@6{_I2daRVzAaO9@`yfVrdixxel!4Jn)X>irURS@Q3yPcS?e<^`fO?sDK)-l z@dwr=Br)4YgundW0L72h|02+r>(yP-P*i2~(WA4A&o^yroVttG_$#ehapkz9mOVP@ zny|6f+ISnTHN50PDZlpHCOqrgE6Xi&Mg|@h4JoX#6T?(XJot~x{dz3fgR78*iAhQE z51I2q3Fnya9s~NLK2Gg&zeZ60*1yNoXo*3$J)xl{V(Bt-M4~q4Am6Myf#ZEdga&gR zhi-#eaQ4xH&siCpWK|tH5uKuNllLoqtr(mc5(KHnn>|#<@D1RYc(kj_ROyV|iZf;L z!{?H`j0gv^NG@x%G^}*sGnFl$*{E+k*WgxNpp>5BA-RTxRh5rA+<+HcpC7gcN^u$r zaO{J6|3^kX+AMSShJ{R=?Z%~3NOxu}i^i({EF-$YFY}^`QiEcTHPn9!(XQ-Brx7)p zE9t1#3gy@K8d5MaU02v(W*tmqan>vylDn0#5t}*;md@G53lM$cU(tNF%mCm71 za(XtGXN>!VfBvXJ*ameZc>V_G(JXPxyTy}F)e@;@=pj1!nfo8GJu0OPEx`}526&mr8=Yc;j-0bc`yCCX^lL4 zOXk>{E*`tjn)lF+(kiF^Xfev<&|cUOWA++5oJUP?34en8o&!gmJ27Myv4%#g@L&>V zj_KZ7PeO%PzX8)gDm_oaRE4CHL;`Q@Tq+eJ^IF^=gC1wRvCi_qwY8(Q=^hrlbZw@n zl9e>!Pc3@zg!cwsXSSL*5;$G3TfA}eS>j(S0ccEh_%xiUmQDI@5 z+4`%IUp_bdy7msHJh(|^+=C0#Ak-F{WQ1CACWOnzWGFXgmSh+5LzNGEfkq$vLeXAG z03HN}#2nL%DlkA6ZnmKB18RY!Q+V4joIKUFzRNggk201YQmdea?d+J0o>=h;K4g8s z=H~Ia$6>g4e)%mVq%+zwSN?~EjAJ2IX2)16opcZBJ>z*Djt&Ezgx}Y9&9jP$AEpy7iXc=C0~f7h@V#_{3M!dGuYQ#}RU*Br zo|QgsM(t+*O0Cya0F`}C9UMeHnSjXPHt&a9^Ecq@PCF!wh2MhDV{0%`Fq^gIF&fnD zHuNZ&8_l=G9zyx99dyU>tphmS@5iICEhuYXH&G2_a64j2bRG6pxymKwvyobD7X(qPO-}5^LartTQ zma-1*FAXR1>Y_Q$%`0G^Sa7wVgdv*n`_`kC=clU)F9u~FpDAj{ZD=ZQx_WzKj1$m3 zl{+^c^v_g#$@=)J|5zPSR)COX(P53gUJV{bvC$6x)wnNStz4*38iYy=ok0rT4~|wneC!yjha*x7n(XIRyVd3&K}iP$>{2C&_5G#1C+6$pkjWR>Ys`4 z^H`o`u}t9=Ai#s@zyK-rQ@3!l#1al7TMWboIalABQ?P?vebWwsr~lgV(Pi%IHzlJ2 zE~fihE6gnZ8`vK{+U=}Y+s;VaMRVQx$S~bdbyAl!dBIzi>@ox}>84S?y_HIB5Q|q# z=_=?`h^1;SDpisP&A zkKPF1vv~cNB8yJ+NO0Prcx9#?)?SXPXt?;Rka-?^ApXls5CE0@bv&(Yq3s%*rSv$9 z+|G9r$z3NeOS}5^R1_Jh3(t|}wzj;tw-_6g|4Bush3~Sgdu%|Il%VFcQ;*baCr?C4O>J%!~#dToF zl-7F&yXvwGj9lLC8w6qfOpy@#5i&*J0Z(INEJAniXd+%0_kN@!g%Zgz~ z9=jfm0e5a8kyVA;aSDr|nWkl_?#4y1O`s@?t^{%n{v&I4yUCS~2-uFC=%1W%mRuHu z1+Q*jd#K&wpC)~OwS&_yRQ0Y*7QT486~!u@H_7vh#RDs}VpR@hQg`7K_U8>@mvoKys{Utr#f%bmQ3sm{5= zHCXF-qYQa$?3%9a{SS9ca@o&VRljN458*UlY^K))ZVLMZxG1R!UA@chybe@Lb)|&# zNoCLQ3}aHUT86i9#T^I?`$%P_iI5!Irmrb+er)iE;EYmaFj$jZ(+7)aoSEx63Did3 zUGg2$M~$-_+SVDn(0_RuhGmnD|FPwTZI`O%v;x9O%sbE9c`}WM83KPFmDBEabwvc{ z1En3rrZ%ql0}L13I$m}B{vGT0UoPPxq@fdI+N1`RY*8WkqQRz@8Wq zir}1>bzc35y1}2m{94<&UuJn>SH86aseTwgNT+svq2FgUIZ!53oB(Z9=Srx3m0G@% zZ@RTQ2g8!BiL`E%RZ6u4F7*V{ZdtMuvom%by(21_V-hESqbuxkvj+st#|o6?R#76a z^K^na{QmWA4%>O=S{b;H|JF>FM-b|!08@DM1uRY3~gWl<`(Qn%TdhQ@gN=r`g5Q7n>y5B~KdxdevXC1!nTmLSGPuxYHuN zXHsC@ai(%IXgCmgSL}P=oQ(_er@ixyo6*WO_e@!M2;KJ89MpSQ7)c22{Jw{kdi#yn zC?=a4{)|p0NZ1l9MeL}_$mi)28778__i2kHiq~!~qFC#%6XW*pXqT;SX? z%4aL3lpL)2%T1Ox!`d2u<{nRJ&En$MI2Xm>Wiryf5%;e?lJ|(luWm_+s3P#FklCZ> z9Xpz>?6%BOSD5oK+9XDus=mrYGVuig>!6h7Byz4w~3!nUdJRKl9)nLh8GCA1N>N1c z9LE~&50G}JamWx1+aI<#WY6)d(t*PYrjm}BmZd9!LDFsewC$zQUj1c7s`W`txVQe7 zHrj{0FPVzkL4IH1s0n;OC%LNV49J0-R(}bcaq>Raz8IbH+kP2ZyG^qQgfH%*iNd*M zA%!zTmr(-hJ!MW`8PS~&+76HRqvjmjl(kZ2X;YR<^!vjqTp#J=JA5)rkK<;oR&n~m zr=8FA#JR)&oUt?D6(QeQJzPIJE5y9Uty=sxmV1;{82EllCXRV;Nu*ziSNb#89{*72 zFc04k^TM|@EXHe3bxH14)o(w?oA%v{+#Xz<`9ghhE!92rg>`-1+kApf>9Op7_@yTs z^X&z6AbW)$qVCF%^ESGT%O^J4Wh%+enV5sSo;vr9ub1kTwkCC6qS@*LL7ObTSx1m$ zKAMUqeWzoEHo|*JWi~NzSEf4E$|Ho!(!n>HFX4WrDKYcYkRQxPg zVHQfkq${AeUx7ttLQkvkbD4=WEhqUKe#z#iO)_PUWD1p^0Ru{2k>KXkz7W_&+LhS_qt*KC1WhxTA*U+p_UT9Jh}QK8UveDAAcR=6Ar*?Lh4m=zGW4=fWJlXIBd)6C zu!N#PYK#=?H-4gK?%oCU0RbHn0d@5LOjJZ#@W`Xs?B(>UyhLJaZiiqt3{+H)v#WK4xGY>GAH+O#N#YyM!b9C_OUM@L<> z{emP-u0nm_4=JP@?Xmu4|9LJx%%AqN08zes?f3^@>bpdTN}T~&(62l%F0ga`wWj$J zjT9ap;)o>YW-Br*=_*RNW^V&KiPj@GksGO)+ zQH8m*KfZnl1G2hxr@(NeJVJ2rJlu6+9$=MfYjBnZ)bGKU7a9QsbyqzhF z-A($=54UiY@s2)yPTeEroTR}o@k?TDLr)6PH`n5HGd_Pxnn*}!gQD-I9`@F$?mr)u zn0)8>CcYH5{5tUX#q1r&lHX`uJ%C{GB@awsj>^9Yw8X@Yk3>Djfi%G}kixt$m=`0> z+pLT^HW1D*OT`_F;AZ-+;qkj`|tkGxbv)E3pmCy2L>ti zvO>}5L)}HsxU4_DqZZ0RuI~BHhY~;@zR`6{FRnk9`05j#IE$M%Ml7l#DKs1e8R@^w zDl8>lHpmZnBiH?iA*U`5oxB~c*)^4VF#_n$30X$$~8sny2DW6)f;;(DogXq ze}eQ!Kk;+UFcH1Zjd>o+|6SlG-&wAA!4}(5FWh;1SCDo z8Btk-+9Hc&YSn0|+XdMRkRD8J`EjLN4(}$>Kgr_r<~Z zZ_|LN9mV#5ezzVx3b0z3-u^qQMn}-@h{C~vvy?O(WtXV~_Si0m!^`ey3^;G!QfoIP zYnoZpYX^xGVutg_$}c1r*qMH!XSnF((KHvocM$&=TO$L1&yWXx6#o(aF50@krB?+s zdG5!z^tQ@cl8izEa3H}IV#4K1XbMx^a7oCOn){xkVBL%|b*0$9`=IBR9U`A+CoBB+ z|2(qtTQV{Y{Ufr~NB3s#$mgm)4(`9DE!LalMD{;te7-orAT!Zc4@AbxNBAS5Cm1EY z7uUY%MC;f?@RflRb3MgwQ>{!M0vOb)OrbyRqr4toG5ofHrx7X@EIo7%p7$N9;|h5? zW1Nwb`cq*C_aPl=y`s-wuZk#fOx4q;#6r0Hp|&igv;#lb=|qI`@0q2^=v9jk$~Ubh z*xZ?;u7o1Sr75JTuVm2il#(wx1WVinKCR}ka>w5rD)hHHFL`%%4%VuJgH!yeQF`Z+ zG%jzH-{MxkF>|#5^vl-$kowD{;dV*D?P9?_y$Z%e z3cf>ATu5M`o87nG=ICm}S8zr!a;?6+&i;&(*v3r$`IJ!fC5&oBE0}qrK7Gb&(H3Di z4Y%s0Y4EDm9Eo|=YYTQ^=bhL7RB~AY+nb1X6z6Y(bXs0Hx6zs&j(v-#(akFRF&eh7 ziM>=%`pLmsb?17|!o5mHr782RBL<{HHdS&DN#5o=qY%2)Fu-WsKo5F1^7;$X1h-IL zsc3twyVo)^V@KwpBPX-u@|QOS1mkot>}zXis2qk+ZckTlhV(97S6&{kx10C*xW&ul zIkOm%HaFdJ7^yF14_cjG1@h{IcpUiv)Q%1PwV_m7&nQpL}tA1$%z4Wy^%bSE7YV zs@oQP*Ohd}t>jV;MT4W?^s`w&F6n4;KO_P!$O8!v9I$ZB9?UO#bkAp(JOh4t@o%sei$& zD8#SOGKPu6YJ7h3hjWMk-S+=z0W8@{^SQl{{DDDLni^du3M~8i*l!=*DqA8J2?*!R zIq<^OYIx?p2$(8h4Uw(xyE~|Zn86@y(g%4Nrg|M%WLGKhUPAsI2Q>Z8O(izmm>xM? z{|h;yzJWHp*mljjLc5zYwNW-r54X#+?+VK~>X{^`$YiE37lg0#eMwYre}5{~)RryN zsV;X(S;u}vif5;Ty$WI2bm!w`g0Nf2T71{9q3o}$ZP7Ks2D{qPz;^uD!6LD|izhMN zfDf&aHDo7JGi#eWbz6C6?z&dZZ_2`pd-8S>4V7r}A>q`0j;q4MuH{jF^?-Ri<#FOm zMM4}KxonMRy7g;}E^!uGE_|d5mvI@*64s^W&=VxNCjU7zb3QSC#>npdHLoWxQSM8Q3(6cJRi zmMLM&FKN#Xs?P|V2k*IXMJV;k)L66eaSPtLF%TDQIdIya*2G&B(~-2J^<^`n{32Y! zsc|rg+)qcEz=gbgerJ1SVC_|B<*r~4DBMaSj@M^Ytb8M zF#O~^)y5_ZsLaMoJV@)G|p-T-Y zwm9}-Mh9uQl|T4BD9$1Z!vt&S7GUaX(ZY8<|L70iw7IWhb9};JKM&gEVM$I#z*mP8ZlL$kjbekeAtLaFm6B!Zq?_3E1i`4GjC+$6|GjR9XLu&?K{UO+iqfMp@sg1^f6*nt4@>U5+9GORB(X_}K&z zMB|QXM{q}1i`s69`wL~}t4d~`M_(&u#}-Sl^<3FDr!@-s^QgUho4sTeRQ|jGWj9j} zcFa2Q8uC-rn?qhN7l))FKLoj?gEePLYk8REjF#d03gbB@oYE+rmt=Qr0qeh@$gK^l z&t=s;zKet><-H3aq;IW7t!LB-^{p3>jmIAOAgOsl=SRm@YicqAJAV4VW%&Ii?a4GQ;9F?cfKC)Uw!hlASv zbI|+XBEYp)*_<;gba&bk0T~%|wu}f>8uSnYi8b@2+rupzRovq*$2U|lp3Mmkc8=uL zX5JQ)B=bqmeBBqI1M?HL!X)DBy+nFWI3M-#u6y%o*3~2I9k!7qs)GepUw|Eh1u|>@yd*B2lxJVToRd=DsFzSn4{lylMOzrS$$Z%v&-3aOU)moN%S; z@^RVluu*Ed`Q7VfcYGgH!m@Mo_|7H1v$d1^0|kHjcSSLLgXHetGtf|XqLM5`2C?EC z#JcG`29uh{T`TuKhpbz^>RjSCX(*$DnQ54_fHYd*4arvP3KA3&YTN08jQ4_2pJs~1 z%0jub!}`z}Yie{V&AE|{qi~YH9sX7`V;h>Nda_OXm2!L7m&xph(^3#8(N2~*KPBu0Pn+$ z+2?u=p*5to-z~KNdQ;n@v>|HyWhg|7x@f5qN_0dRn*dG#+dnleI&es?Zny z_```Nfp%A?9XMn<-rWFj*y?zpG&2@5X@kbq4p<)~GHJz#nHZEG;5clTYcz`_d>7(7 ze#oG4Eq(qu#oUF-HCg4nKLfgVk-%7BL1S+;NdSHNg`9TeLYdZ4S^mkw^6VZQcI}-e zy_WWlG}~Dz*Ac7bG@VClUn&dL{gLkz6#kJ|6b=OF1c=6J0#=Mn06RvlQktlwGMOl6 z@xdC;t~4kU37b&|x?55z>5S2*XaFA^MQpBczGmcm*8HNATGcIV62duqL)_o#hln$G z*z-7%!J|J==SsKmEh*Sj_ZtU(#|Vs!X=E`?C}n50jQGczwps-d|IDTMx>g!enKGH= ze%ufvQwVw;LO#SDb-$Q~Y}lUjOX4vLF2c@39tSCTe6;er9=_02w7812I7LgL-yi&pqewk?a-o3kfTLW+cv7F%M=i`Usk7pPK@H z9N=--9CJ*ke1aqH&}i{s(*m#gDf;dw=oL8>Lfkd=KKp~5qXx$-1OWk&fupfbwHEsu zfvOs?D!TwVsF9d+%m+}ch5sULl6*lKQ;EjWql)Sms|Hfb1dya=0HQdVT@}BW|dOqc~HWuSCVfMje~Nqt`sH1Ly4a3VS=7cHP2lo@jeIE2&Y%W zdD-VfNfyABAkc7&h`vjN3){q{+tE@xcM-<&5;Di#LBg#|jaWJ`K0vGTj7?H;mF{5T zix2I;_WYuJT;+kbJ=*|DCGxE=%AcL^!$(+@**a+&W1Z>r6<(L>n~$)l-*WVC+(bk4Sm!T8`7Iz4zv!CH%{Yt43Gv}`ZrR{}atycA-V8A=%LZMH{q z7li{fQTC^@i3K4+uz6{VxI%8w`OK*trV`of2{<<)a83ao?mL$U)P-}D}CFUGvrkysWlnN5D-;IKuA zq8Y>Q3EHlx>3a863dzMfuTaROC3>!X(`i8fCe*(dEGrg4#}NPN3+7+Hj8f}N>G__HEM(xg%q5n+PPz~Nzn{?! zurT!pEej5JyJPvVwnyM=^Qr_v*gK}A`Vs6_?Dgt{TfJ2>UYA<6n&Km$FB~HX+W{Fu z5CnJp-PP^QsC?5`%pIb5dVk*C$K>{`PpkdZst1Q<-hogw1zG zCLZ%!NA29p@S!d&i9Y?I>I3#+xYyH_#mvX6P>8v)YwHnq0^anbjSyqsg*VCeNbU9o zwkxe(`yy3-BX^*|`-Y7$Ecjy9{AkX#PP$_{CC0F*<+1lBANWMyZ=&%Lfhh6&Qny5r z!bX#`NbhOm+CV0(u<@A*%G@h+&a~DL^d8z|I0L?zKW1mU*6<8(@rU!^8a^2seSdoN z2FZG`L!M8iVRe(Nx={MF1+HyAuqSo-3BPD##><#TpwZgi;W6eCSDYiWj!5e@e5@{h zW4%^dXiAt(;c2A(cduS7%&Z&$^sJLc^}JLzGrF9yC^MA^yzv zxeZSsIS&J9-=s*l!$q#`>2kbm0j4#hh{zETctveTD!vg>v1#GxX@1lN;vL=nwV)uQ zR|}tk3!gEKaxv;Ec@)N%O-Mlxs8B+3Evr@Nh)BkB_EqxO1U`mUz_LX@XcWYa1C2fn z*VT`B3w;1tJ9qFZp&X++y!ky$w8RSisY75Y#!co+=SsIVn+8Tug@ zt+w2G>*k7i^fr(aXIyo6fe@ z4|htctW;l(#ovxjSBEnyljBt%+G=i4q9fHndERO=#uLmnZ&uqr&`B+S7-S}Z&u?MI z%7!H$<@~ZS8+f{Q9o!B5^E24g96F6|NV|W?)Clj{W5B1Kqrckn2bhL4Y+MNwJFVop z7xSG1@n*T-#(4sM!?s*r#`FGqh0oc3Fz%q!UAgd1eLZBkhRt&N_#Pm+w-`IE{w`8o ziAJFstt@bo=}*0kQtQRcqsV*ka)CHGqHDfs4ri`s3rn1G%yOkOlgL}8Jifg2C8Uwt z#<JvVHn$AB2S0g8l6OQp-XrL zwmNJ|@!4-czF7eT7%52~wKM6>F=2|e(9rY39<239(*e6CJcd_H^p8x?%yyJNGCH4p|}HE?li-_;-doV0*UQU@`jzQ-!<4QB6AY=(dh4aZSa_2pY)-w4(Rp-e|U8+(#+&ys2kXK|4$ZpT%S+c$hZ15@jj{`NBA zToJ zi&p44z|Qa`34NfnUa6k?cF_=it=>x9-UwR{Y48cg)bRsox4oGb#21e#F)Zq4g2i0NF|z~+dc6%c<$>fF!yMZ~)f_117pdYIS`-=#Ll7c0TA8O& zsk+vAQ@)c@VMC;UF80tb{tT(1dz^~N&b{!QMm{Ybh&6eHyn+#X`Js=g7K*$?t(WW| z5}kU?%OTk1vzesk9C_@E*4!?7t8sRAEh9-CiVW8B1d6e$MJ!YA<-Q+2UY21P-;IUj z5M(1bX*4nDrO_!q!y_U>Ew0393fQ67YE7<@M~EPvt#Q@{+*kM72#m@aaXuMgyJatH9i!tQW%j3 zRpH9-bBTB5-U`vV^1>=_Ysn#_$8>BA$B!}iSiNp?WuCG; zJc*YsGjpmoN!UYS%`B>C3mG1!wY^i|_1)Fc7UP==+c^9+&L|xcuNy3p>Ob5)XT$_YU{MdtT3VAukAV6()z({gG;Pj7=M9 zE@zh5rTtqPa7cFPeOtsnW-;#La897nD>P8a9(aO;cV{bqRV+BBL<4H2VS1X}59>Yt zey{6dOyBm}iK?S56Bt*JVN*biQTe?e1={|IWpomC!qjWb)z3Uvj-}kx4_?|Jo zU-0Mbz1G@ut~J-3*SszrH)hO+H!E{;UOnLnxb1MgRW->4vhj+uKTW8D9)u%iPjxo* zUJ_3nD^|8W3qKEwZ9}Ti`BOghZD!lhicGHyv!B_cpWB#!{&PU~<5SU)pr9|EeKiO6 zxuMjGFMNY;!sf7Tm}C;H{R|nupq&Z>G_InBjBNW+S-jIZ){@q*8R9zYX~$P~az)&3 zVW)itO1^^hI`l4K)QFg!^ndl_ZgyXy=yqwdlPt!&_8+H7#1P@T3{E94 z{ZAORGiN1BCF^`fTrJ2j9(|ovD|$le;c|)nJ>~lz zf#tn#iq*`~4F;+$M>7kyi>ft;dXBfA2eIg|uN4*Vh1J3tzScTJi;f4ibWe^@` z^Yub?rq)hQX`pI*azpI!MLY+kUY)T*UN9W@@%rFEMoRqxJsocNwE50n8@^yKN3krg z)JHb7C*}gnPfXoEBatk~{vKhb5_&)CqDRRV`{S28#}{%N{2?>rw#G}zxdh=xqp#&@ z=YP?@Vt5q?X#F|~*0ptLDJZ2bAG?~aql15HWs?ECK_TGH&P<{`E}hPEP8^D-Q6ERT z9dI3bpD3nU<8e@?P|max_D@Xey#?{hZf5>%eH3=2Sgh@qQ8m;q)h|j4#u=SCj5bygjJIg=rRwp}NkB;zomz9af z5zZ?+r^3NC4+#w{!cxHC*E?MqKzVuD*QVFxc)izlN=(r~(iABFyUAg~)}d?f`+KIm zKxZ1+={Je;GDCW9ZqG$rgNKUR9!q#o7wnZ@c0c$?ptn0qZZ&KbZf+Qxn)4xFhJs`~ z*_*x}qzE6AB7YW{;bW(Vet)-h@`Uok!*gqbHaA&R8=;Kzzf+5i;pZW7`}Ud-_L)03 zs6RSbnw&(hlj_!QSx5EW_L-v%dCC{alV8{f#njI>P(C|CLe73xruqpz=6l43 z(knx+dKJU6F-O71Gs?GnBi-lrbDzpRA%{#4q(AmPr8G7Zs`&MuYhliu{~%Y7lopL( zmd%6V$bIDR6T9NTvvXP4CLc{Lxb#O+F)ujqO&R2jcGYJbVS{b!cUmd#sLj(%qZF(w zd3+|7yg$EJ*RAQc<=aRna2V)&LCUY0)V8ZTy!Tj8a~tv_l7PG(571ZK^4)R-&(~?P ztaqVNHMC8CXS<2+;GZRD&@|KhKGQvvbX+4-nV|@F2-u^cdXsh2? zbdm#B#fi~0jf@^ZtTf2d$3M7-7m=TrBcfQTIABoP2KPu+5-zB`qhaGX_#r($*){Vd zv?E^;WL=(bv&{bt*8M;=u@z>KHUDJNG(&36`p;`MrwOJpfhOyW=xoIiVHx~DYB4Xl zfEM_I@<3HTV$Uj`<-Ix;Fr z>*O*gsO#0{zJR_rTy{mv?y zU4`cg;m|2;Gp80hZ{#2#u>I=Xn;&c%WKfScg1^W7cH?z|#h#!Safq@Gj^`8(&r=JH z*$Eqd>g}q?h#fVq?I#}hXNCXRsJL1MwwjVBs%+#W;iLFD!u{TkyKjpEo3gs#_I>U2 zkWz%``-_j&x#|h%Pq}`dBfn{MCO!!2-t=@k)Q{;$6`1Z>NFMxYPy?c%cRFytr`ahC z-Q6*de{;;e@Hl-KgB`K)h=57{%MdYti=S3ZeX*L^d~}5F@<>L+hQ^URvZsN-CmXr7 zkzZlvTlQC+i-kqKCI@L9?>DfQ0wNdPPrn#T&O2?5N|ok(A9zZyqr;0fG*G}iqEq#T zi}KGe{2w337f&MGs(T@Z5XAZPIA(QxF8P(2&r-14&nUr2d?aPk`yfBwer9FktFq!u zsZ!L{gIJ5KW^@+PnTX2Ac}(;oYe&@Z_0}#w5$5k_w!T zYDz&CXM3~5W>0f%7&t|*DLlpa-XAGVLvAiZ%L9{W-+I{CP&tWFecwhxoNscja>$A| z$uQV_Z8_=*kT?K;m3~$%2T8C02u~!x_Gfdr%(C0EDv!xZ>$F_L-1rC*O`iDEn?4*C zkux#nF_G}I79!>Gh1%?O*J^LJSZ%M!b8H}}kmD`!3bRb#( zXcjMV2>0KM=t|`v@)`#yJy*Oz_`&Z>m_JUkSuQv;6JGL=4yjt`tEsOM!1fVz8H`s= z-(K$N9h{7EQB^)mZO@tO)uX$JOt zPL7##(OGq!9|q~G?OMs*ABZ#@l56xbYpqfcJ+bWFIUncr`?)K&L$#G@RelphKeW{OA6*4v!DB z*Z=)FI*pDrl)@f(#4pdY1R)XLV<>&t*630g>ni0Ur>b!e{qYA`H0?lq5SQ;L3HgQ$ z^oH;_7|xitR&D;R3o=+2gs)}rz9J%|!{rs0HSe0}TfG-VoTwz^a&!L;?CHak@xJ42Z(wIj_HB$38YqZBnc)c{=jys{4(nzUaAP z-=hdWj*LOgc0n#5$04oQx3$rIb~TD4*|}aGlA=lcorq>(L3E$h>X2u;I{IMdPsjPn1GGt|Qzuytfv|lKvHg7(( zo~xFzB~pvVy+rkAO&X9W4Urlo8u)#|IzoTN{EJYkGkLH%!6MEk;v92ri~+vn2e>hx zPM>WLlo5}7Pke@meaC~z^I18!J(j$z+A<$+mpW7aP%+pGc;wB0JhIQzu!NTqM^n7A zzdYbO9y|>lE+r-64#_!>`Cw*WsU#@iN4+M5(|x+5lqUt;@kyd`3~CV%06mPpohs$3 z1tv){#Kwee3;*mc6DeFcyway|Ge;WsdBVR>BhS(@iHmX|zGNn7ZLI6Bc-21>!le1M z#;)nx9}RxkQ-e3-ZT=>I_!D0bn}Aop$?i@GGN5BSflDkELam_sxJ8hY-QtEp4oO2b z$5A>(^R{{P2%5fp1XpA54fDh=H$kz2($?nf$&8X3FJs5B@v6V-Bzv6~}zy5=VMhHUoIm+o8zc+!JV;36$exAy+ zlnB;nn@jGTFZe$9Z$6x|c|IgO+VSgaBm#eJVW<$w-B(?a(*YHwig+f)%Q#+>43i9L zanT8AtvK$bL_4P?E{9=@1lvPj-RTZvZj+y>s=|i?T(EU7hLhP!XZV4Y0eOlyql%!o zREoedZK|-|RiA{A`OwZRw}wG7f3XA^2z6PikbkB(f69heZkT);G+e5w=ru=kq9zx_ zDz}RGin?T%jF#j|@A#fr0YK@P0XjD!1VZ~Gqt&Htx$ad1iXPS?$)Vg_%} z3;wZ;EcY5*G+&V?U$9PJjC9xcw|}sS%$uF5ozZor=>oCT%HQ%U`=qR_#r6%tN}hMC z&tutNu_s8DFp4M9`sjr!HuLx-09T`48zYCeg%%wfO9~9G${vwb$g1Xi)B6t!E8R?y$I^WZ13Bj+S~t9`Igvqi7^x1wjp-D>@>U*%DU|u z5^YHc1*0}8oJDFiR((g zAxh-uL##3)N|u`ya69h=qb)VLW}X|5{PZU~lN!3>f3h8BMzyf=dJjh38W&j}4C+=; zfZ!^+?dIny!Qa`}s<6&1PHqPJn;&-FZ#aeLCx|C@C{U|Fp5I%A0BmNucTX>Fy`Ss7 zT`bypwqJWE;#b;u7GrbO(hIMi7YBbtT7Iw0Zr(-_Oow?fX%7swM!;vBc=PTJUFF`*+ZLL2>&@I=6iI zq(8%k(P1NB73dH$80&hCbs%6*-ZylKL)BE4fo|tGeygc8d&hP{)bGYZjp7V2p;+4s^ zz?JLtRamNdkrsDEPE;|idW7UY-j`q`8kupS9WLdRFBoM91` z)_JhLHGV`@B9JuBzz9TCW)QPVik)eE6(fc z_Ylr2ZJS&v7NZd_OTfYdct1c335LI*uSf_uO?!en_O-6>u)knq`en{M5sgyt7E#1` z6z{nt!PLM4d(S{p1_&-fphOOI&9AEQ<-?OJK>*}1Y1TN%yon}}{-|#rl_*T9Gw)B7 zAKI2Ew24G)d3{f2L~{GuFiOjtRujY)BndCUKXUji!_kMVa(U3^P6H;sB64xjk09n7 zHSZ|qs8h{bV{^5buoNaHpWuY!>v&Y=Lb6#S46}GnG1^>c|M(87o@wqVmpy&&3$^%^ zh#Ntv8fDVlH{dwhqDu)bWHkJ1iTHWJc9;s@ zb2PS@nQIPBLJLMEdK}BU{^7Bz2q;-XaA~GPN`dc6ldnP7HO}#h&J0$2#kyn2$$cj2sihf|v+>(W8LAhm}DV90qMPpaTY*SaHt+lY7^q+I~&=xz{N@d+Pk+ zut#$qfMx+tNUDFt1$qSBh~XGT27NQJpGY?Zc)8cG+5%$u)s8)%Oj#+slE~c?Ls9g1 z1BT=0A|fuYd6}kgVLb{5>5#+>Ty9dp6@WRJ%QNvx9Kt*_gKqYo(Fwblxz^d6(CW_^ z=EHaq!Rz6oOku=xzeL!%i4*ox5f`w`U?Mi-=)li`4g`#u$j<@pwM}Y0a}CBcFH~t> zyrz+za3&TIl6LCA(Y+IXJ9qoAIP|^;>x?%y9TkoWI@i#wb>z4t-17CT&tk=SO*z^( zJhrhf1=ZId&Z0f#*g~5L9D678kV}Cc*3McZF#ZBeR*SC6bP8?1cY9_ zC(S2nTu}RibwBLJ>%Ukcq)e2S?c3lokT_8qqucdvEF*;YzVf`-w#Tav5$W zhajV3viP7`5vRE52`Z4>K@{gQ^kd7Nf<-b{`uy)>T#{a%|I}7YYa#j@ql5H1q`{9p z?X|ZgkiYP`S|VfJ2lsI`vazw+Pf~ATCQ~W(QkGe~^DS7yW76?-TUoS`Zn5cW`?k+# zuUYlLot{qdJL7PjzVjz_vXa*O2+&>}#K#F6w;{a>$r2TMP2(lW+jKx=y%V5lR<1r{ z(JgIk8J#yKKX#HQ9S5?g9+HwKKc1-3?B~m82o%%V9}+RaI-jr5e-?mdKmu^3G$9T1 z(}V3PL=~eL%s5tX!N=b6$Z!4qV6VioSD2zSww*4hD+au4PpA-m*mIOWtZTRO8kuDPB z961gagwWH|)4Kfqi|b=oj@PWCP6gXlwp)1?SQ6~-P9+ic_jYvNV%AvpH3-L)X-mU%7Yc!LOaFh#`uCRhY-wA9*|TD4`g8*= z$tGKJjhz!qQ(op9`H6vjSR#+9U~H=M!Ctzs|gs*<*Ab7`a@lMrv8G6NQ3E zb2Jhc7s@<_`!Zw*yx~tbm5%y0P9Tl0FGA9QkExrWy}B4|-!I+MUEdJX{$a+X4(@E7 z=6Kv`)0UmfVjsNf-oqYt#kybc3?SEO^Q1!jP`@$hwA+UgORqG$+EO!y(WFH6qQk)+ zq~~{(UB-&__2h8ELpC=z*ZzE0`=Le|X~9b}XO3>b{<{p+8e^Y3ym_cqMf)X!NI_lQ zx~|v~fxP%7htNl1Z*-ve5R{Wc`wieTF!u~PdJ{l%Iub?oQ z#&|T8*#Wl%@Y{tKF;n^9~7Y3f(XFG4sxdeCM5lwKohPk2sEYZ`{6Dd;(pggCJWn=jp^cy`Q(q zYz@x*uU?d>apIJd6Z2`ZC(yuO58`gpbnRxfMSd#qyl;!#p%zsIt(pNKz+&1YD=Y&Z zoKo^sa^BF&)uGTe$c%j+Zi8zfWGi2HOP{FW8>}ZTf$jIW``q&PHaBj7`zBR~*ey|; z{YZr)VpRjzwD_)e@2h11czZDfim|ysv;#ZC>uj6of|D*+0KhPL$>LE-IcossVW;M% z!~Fz#m@WR{D|%E^)CZ>mCjSlT3`%ekcT9R>uH<%o`FMA+EyG-j=j`TUX>Po~S+Pm0 z^p(=+3ZRjW74h}qD~YOj3%H9_3YDl-bS^$nJZpSr1rVsi75aj)w|ybQ7~$R>Kq0-+ zWsm%hVu&9oc+6LDPCCw9QS4y~S&;4#3Yp0#JfzMNaPf`3zhQt4x*=TD-;N8@Evo~&awqZ`$l~+4J0hyh0GNEze0^^KumGU<} z7Amztv+5Li_d%7O3iAJeRvsZ!3u(SC}83btPW%uDgJ^)tSoT+6gGwh(YoGj~?N_@f7SXTzt+lzwCqm?oKb%U*b>T?JB z0>zAE0--XR4hjLMhg=z;$BC5N=)sSVA~dUgA20yqb{!G7e~jg$7d)3?`|rJr&5^(c z*X{8~?b9-E+xZy?7`#Nu{$wRCss$|RrY zN6u$y&#r><1=j%z?1Vx>2R@*M_Z_bE&;k4`%~sg|L@$P6@^2Fq(^L=PKKnxKaI;-n z*%_~u(_sA%yco^5`nK{T!02Ou+4}=pZ*hNkq()85os_WT@r^K)S(JF%UDq$z12@+v zLn;+|{4Jya2nOhqyE6ITyZO@gg}DbHqEW7Lk;Ig!=gSf?e-8b#P^&Q9u;VM{8*APB zfk-KvqX{8F`uii;u8xYyh)lOk1^tL9uH|*Cp%zqsZ{R@!03X|yyoAqLErI3#WwmcJ zEGNjvjHO_4M-wjSlT60&(4+iSFEoF91~9=r00Ni`9Nx>}!y+T2nXI0@|02+#vVei- zEnz2A8Ls=N14S>)zRIEshGY`x8Zde>5m^G;tlDl;RZY|dhb@6g=`N8nQ7 zT=Oy?$a_23bnDq)X1_@DEGe)FPyT{UvOD=3Z%1rM7vN`%fIRbX^;?-xo4hD|w417L z>wlQ+XyQ?}>=Nk!is9Op=X5bQx~r=zQ<9Duutx_1{O%Wv11JKoVARp-69?5UUOBJ^ zf!zeKX*fgyh~96K$GuK~v;DctF6kZhP$!kJS1N%TV<-X&_HSxs&zm4!#K$zrQVaGJ z%*>2B7Bs==L;*m>@kCe*o`dQ1MXqA5`6!;W>^O&z&3PEfWF!{?5Pciy_XY&w{4=g< z&+x7I-DGu*_W;t1x&f~*oJdR;bq!mI{9z&NC`7=GMGf-1zy-HXq20Ri8p^ynlR>y? z&rNpr?8#ROB}L4UTyDFG0Zh=qGl4CJO%h{HtNINRujM!@6u#At`}K+H=%gqk3MoBx z8!Blgy+Ny+*JG|Yp3_s>M+sr1_?)MidX4t}T)VX<{oTG2$3c?t4Vf$ObB?+n&**h$ zlS3pw0}L}QUN|_Q#M5|8mj}2yqL^Ja^(FFxBYj_x1kF*%M3HvoO0>l~$({*XzWyGV z&2V~}bAc%&GMX&U=;L-SOD`eYeS3YP+@6IkCk5Q;k2c>p#TsPtl_ZL;B*u}X>f1`? zSXrn76mob^c_x4Zuj@)ye^eoUlxt8l0Hlw0&k1JX@X2;P8NUL$Y`?v^hMp-yU#dV0 zA@}1TPqC353$}10#x3g@3m|`n00iT|m{vn1<}EETe6K4&agFnrv3K%NvW&v$V_Vv} zzD;xH8YWl2V@G~i0A--@F`WgmH4^Oi2dkAylWVx;biXD=B~2JH_g5}6BQ>0y)T4=Z zaC#;|_x7J3KP!mO(ys6AzOzXllFnzc%moSt z*R82)9r;hoPqgkhw%G9y7-KfGYG1lNMRS-2$T^m0$$R0OMllw24s}y3pb7YQ5c*C7 zQJCbYL_9&8%_iNKTS<8H(vR$D4QebNm9f0K9uW5ak7^>5ksC{gty!qjK({t1S91C2 z4uDlzAi*NH8bC!L<*4e^ESEDnOoLXiv{x7uNU&85IANx^II#WtB*@Pd$|tLTT{qfH znK!ZebxQ?griWX_w9AWfODF-=fIpX`SwQ_DYq3sc94i4d8Png}M>bN!_y7t6n>#s- zBIcY0N)B_R*_c+!@E-NtgRaz+K zfCC>FML|5cCa;j`ZC}*LlAyqn3EP8!O)@%NNm?L!bP z$o#s~u#<1-&)Oup*^eN*1gBcLDMRsLxj%riFbW};0VN3XyG^@nTsMus*koPWF){#{ zj9jQQp`1btalFI3oqXp`{Q3AWbI74JQM|ybsNFM^cnyeh|N3x9>@ve*{kHVNk#%aR zo9qS_(>xVk`GXGGFalXgh5gIldyX)cxwdkVI%QNhA`mGX1RFm2FhHkcYK0witG_Lp zw))XZ8ix<5|8+A4d)6YFmQX#@N6vOU?W)Pz>h?-i-9yKCm z&voUXq!t3{x%sPVv%&Pua{u_3$;J~L2yd4U+^ErpE{xn?ECq^T+AC1mK9S^M3AHeO z5Y_5_a=h-4L2Jt1tMm*&HH3TfUi52)Rw zF@%bu4`sx|-Sv7A&>;U%Qq$*R#}@rmIjwq1u_V9WLXy;v`5Uh6v^JHf^Kt3oR15f~pgD0sp+?OEg$9j#w&ZYYY{Ffr< z#h6Avx0MHuE@(_jK0pf%!rKlvuZxW}N)*4QM*CbB`aKPM@`l4WGUch@cEaJIz0W>+ zV9Iol?kIvBhHb|en^f6V?$pq7A8{P~><{8Nc=)oXs>aKh1H$g7n|Le}ZqiEfvbHMN zn3k*P(vz39VQDV{=xnWeYn@Y7s4=7b-JX|F1U4-H5zsv9OhF&j=8%!Z$j9l6p;7|I z_F!%8pRtt*^8sTc0mio2Ysgj(j7_>-BGS-L&`!`=cSe&IP@H&O3CF4I;7}t1||4EhaZ_ls+R*p$rru1t?b?^HH34TI8(rpv! z#zH8))YFOT@ltwNET(ZFDJ?8n?>EHo-VpC8X`Vq1@x$_VktOJ&9pytYzUX>V6@(PY z{pMI%A7$~bO`wmAiebB0jpg1V-J_%-zgI=qP~n=ul{P*y^L*ew-VzpPq@!DwhF-?- z^r+PLSVzbOd^lQhV*KcYKDdTXju51*6vAPwT-Y!;j|VQ|soyUK#(niWNR$xFG0F-J zw$XfuWPc}(ib@NT01p^xzOAy3{LMXaf+h+e7NkT!r)A&GpDG?Mk92UmsJvzhYP6r_lUx~mvH zaF=@*`MK^lJSzF}Vm(`*>&5R+#bdntDnFPR*#a^wDMCSII2r@O7@}amKM1%^Bw<*Q z_DL3|9K+Zyl~kzOJcT^-1j#RCo?PxU6vwi^6aWABG%3k}t=G$}BRrN>VG5f2ovwp@ zoxKdNGv7#)J-DCW<9pP|f6os(Cm3{Ltgw?P&-rR!M5WBqu+iDFe$3-6*%(UNvOC1a z?JBO@7QRp9exK4vqW%9^ssDbG>2Eh%+@x|9loE<}`14iRLgIpu*S2tj_YaB~j# zrMLQH&R}%m&p2xe$mnSO9!OCUKIliOX%vTqC~Bb`bT2{;8#DMvVCWF7a$1G`-L;p* z)|!&({Zg1R#Q<9_qNEe0=E$GB&vEAeQuvDp|9gZ!CK}lvH804qSpz=T3YC%re?A`^ikXec)P#dB2J5z@|mbRK<8unf1 zRqqvjzoqln2&$L>zdT(LSjP#ce2No&*m5OF$K~#fQBG75cbI`#nMAHQh_kXe;UR*r$TYtqG zRQ<#pt(Mo!qx@-+%{&GF-&wS5< zZ*0Z@fW(Rzwi}&AQpzcz7WsisK_tp6N|YyQH8UGnb<+7xOJLzPLs_W%RrOEY&Bb

7F#Pw#cPd9cCCwf{ciLR@zplY}mWn?; z@TgbHzM>%J(#Cc&Qvg{w>U>&1^P2Rh?@-+xKR**ZG0FXT>thy10Ybydn@ZTluCg(1 zKFE(1I(8)K1xo6P7c#x7YZV z%z?tFkp2I1=78$DLa+Tn?(W~f;nJyGv$LA@pL9!p`A@Wy<&iF>#4|f(k1gJ{7usMH z!Cqfj6*nE^CjfmticH%Sczq4g_JcXiv*2agIoI+{yikwwK8f;@;9T1eZ#)Q>N_iro z8T|QAKYH14evn8Ubk${Ctn??4K#ZJ7hSe&W|ErGQayNWdEyzuaKOLG)p?F^LJ{PvC zf+?|%xbVnO(q(_aDWnO>aE&sE^U7d(5-zS$CIESZXzr`)hY(0{LbD&KsT3k?-`W@F zqaPi*X8tjxKk>Rv2{*OPgrsA&Z195UU3GBA7ZknEy~pico);dY+JDU?NmXUVJK2#En3MOHt_RPOY(nZ z^W7|VriFW2G=qlDxaA-}a_FZ)kh+c9U z6&%4Jhq>o`xwu=<$3J_Xv*Me!ndLhyd28>!OjzIuX`7)QvOKFIxA3UNhK+x5JZqL2 zu@y=OUg_%3j0ncx!%`9W%&x~s>mVdORvU{>m}`biV`vM$$w8_A zI4%QB6_07<~%2YwGR)2(>FajIPYg8^}Z6YMk>#x69Tpy74lixjX3=@Qx|NZ_;P zN_e9qb$>EzIM6kr!o`4SYJh>;svQq6&lDmE@xanYmc#!VnoZR)=5=SGE(-gu8fT16 ziBM`bG9R^ckTt$PSea^GYm19sfUh{}O;VEQhdRPDOQq9WdLBH2kSn+iok>B;t(^)a}SGtmgL%Ar2|H+t1S#h^N41%A8Le5FRPNH+5K!$R(a_aXlC8s{KO8;@Fg zhDEe>tQWAQSRD4BbrJ#QI3M%4P84{!io;BVL#Rmq0va8+U8?Q{0qsDT+rK{ezf8bB#(8{i zMm^ll{!(Ecyp0lsl<-lb(e1{smJppJz&#X&h5Ln~gsdgTVD#@A-^=p${NrIhe2@?j zSC~+1tZ%xi(*dqLoLQ)A&D*y}AgDekoKpL=dC=XT{aao**_i zRKgL-Z;Z5Z{zQD?yG1;1B?R}AD*ZP*+s!^Atgb@?XZsQw=++v_v3dU^nWT9bml))i z0ZrR68#!x~Kgm0Lw}K5=|7ZY3VfjzqE3NsNNY}37_fy7`)3^&et2Rimfh|sk&bs12 zzeOP1ggk24zsE^Rf0sZdB?cZ9vZs?m_+A4qO|vi;+(QS`ln;j zgyv6>huIY-_)x>zl6S|~AO*kbR(S>BdCvErqqG87T^4l{ig@+d=D6tJ4zsxNzC$A{ znkLx4wX3*7JTw~AMu!Bq?I4S(d{(2jo=8;$f*1f>5sv>(3b48R;T}@o^Affd^4GmY zwRiv?MZSrM6$Yr}$hNRK^5dlKGNPf;)zI8um8*R*;KiQypLEdR@a~5Y`Ayax6|M(r zDOLJA@o%C5Y!F17wOgoVOOAvZLwno!!ih6a@1xD`nN(ngJZ2s78h=Ia-BVbF{t+77 zGvIUsik;hKIx>=0zkg#LuC7s#jx@ymg#6W-SMpx3^`m~5^LC)pxiXTN5O$Wn6Jv=N z^9CQQIOfrWENa-l9art=U9BsTkt(G^W3=GBIO@Lr^DZVWdmKbmk4EbK{e7Rt7G&2@ z#^<3(sP>_Ph^{T(`ikp|i#4MyPg)psZ`yAR8q4=9{xqQ>cj6=0hW1u@E}EgR{I(M)W+4s-nzQbk)ETT3p9H>H_)f2YHg=Kdu@Wb!ddW0_HAmod+`2 zdX2Udafi9#9d9b<|B{Da-bm#P~Lk$1aLcT^0_ZE`3#|oexP5Q2r~`6Kb}<m91-PN|WGkJbsX09A4~>@7m-uo_Z6C`%5i0-7z|xrzD%@ z@$BLD?C7D5s0WhCGU>Sy$s>HsO^tzEwBFJfZX{ldt+5(Yh0q!g!c-y8kavIL-R+I^ zX=wQWMsjxMIXXJ!HqVOZxNmH!aN9{+XjG~Nl?T?69E2>t*CZRU`@|6}M*(FC{dU&j zxI;mx!2@c~62K`)m84T@q#*kuUdW)zAL{8lG$Dle$%g{bTkr;%De~50ia5$-?U5c) zlza{2uQ+QYn%P|jmKxZaZ~4T>!L6WX3bhLQ#1xvl2lZY-WM&kek zqK6OeO@f*`u9+4Q0?lymMSo!7tCfO8XEtK$qi_WD=*qaMQZ<}1;ivUH(e@Wg>FQs8 zVml&c^F8YiI35rgjI_9|*B{lwr1*;k%!-C<<^IHQeJp15=TWlP0k z&I+4yiGS8MzYd`=0~s@G_if@6?P%OD(8_(?v-0$XpPx8_U|zuBz2)gmS7mYr4X)S= z6tltz%B`N!{N=7eI^pv!rS`1`M6)0s0SRk0H4H)#Ceh52DPfFsW|Q4qpj zn*%~=J(WZM;f#nFoD=w%$!3$9O?Y z(g7IO5FN~R5BL>|S;R#Hp~w1iPEk(${c|mOsW6;NN&J_59$66Rx3ZLqce-VxFKD=5 zopZ3fN#y{d#1`TVMC9b?zIQjz2s!mg@d2Gv^exADkzcH-NPS7cg)%}0@ddZ3Y)TLl z2HJbp`)`bvDn&0Kk+TuYWWRl2^&&6nS!l;l(e0VD8CLvwT)2*hhw|BnGECD zwt9u6+h_;J9nEdiiE9hB*+;FNjkx|7^^ws}v58u^_5Ulm)6AU>^ zrn(u_okJwPRTsM`qNcZCiSK{)1N=;qu`Le2kPGAzLhi5_RAs9`T?7fRd;GfVd#@yN zeHb`c4;Iw5NV}Ff^SJ{&=$FN)zx7`KK?VQ@C|-Y#rQ)uF*{`ZU8HJwj(VBM0eAf8j zyFzcxLMM5K@wQ;V!AZdhjyP#-hmxUfk0F-*YD#c!KYPsgT$S8PT-5H~U3aj!MpJNA zpjelP?&7&VYrJxcN86o|!%Vn*mL?0c_4aE%&)qKJ*;7Y<=Wt-|#rY#QvK5S=twUtO8RrYZh4nra15o2is)aEN-pU;9p! zy997kouKY-ygOaZZMXR1axJB@03D;Y=XB){)8-r>?X{h3eV)wiF89Cv!zDg$)dkP( za+CjV(LgedmJyiw1?|0=QA&AeZm zl<%xezeVF=9#hpa=3t5r0U+(me5|ye^fE+5!OOtXOleFlfN|6}7;=lNbXsM5d=tA=x18a6@ zS)U8d63aAVLq!5>I|27l=YDT!BX;A-#1#b_)5r@^#4@01#8N4`0f({ttv;?9`O$k2 zsQDPrk#EsBfpbT57cz%dUiV=siOY~$je{(ReavGl zq>Rn$ra{)fXGQ`(8I{*sgaDsu2>$I?#fU@o3)`&am8{6B=S4*R(g3haEh@_SDqeR9 zfmxL#$j=3AW}gH}69Ugi>@Uh|io*3-g}q=w}cGL9Fz!HmqnNbGN~-IZd&DkHuEpt)C7fEEu8548CM@%kX}U5@^ejsUO$ z#hsm%%`Y;p7ZhgHBi$WTB0iK6gpc5X_@R(c7?A5`*bt7&v)znD|I0BFQl0gc1udzILN^->VqrFJZ^_x(OQ zSsVD;kAeU2Tak z(=nHIKG8`PrVdXoua6VW#bh4plq7XCz^J8r6NE8jmRlivTV=BqIEM!)=P08?h4Omq zvEb)~zcIif%6BIX5)0q! z)jEus+Ru{^{Q^q59P@kkI*X#(0!J@2%VUj-HR8vYr`~?9vN>PElgJkLy5T8t*)#<+ z_B(iywZY|KWKjQS69(C<5Ns-RF)^`~J=&%&RLv5#VyK??i;`edpaFherUBT#ax6yj zt`AZvC1%MV+H57<4%Sc}V}<}sQwTt896voQBQ_(r-fubBtq0@|wo0QaIP9{YV~bX1 zIEp_4#N2UtnCvP!uf+oZ9@x-D{Zy+1RBos@0Ebk~&?%Cj(afw!uFcpncZAe)2AT6; zH{sC04jUyLkvFn4p3k?dF->q~w+TaOE0w{McyqGk1#gk|mtqgbm%w=6+#eN@@8nd2 z{KUYwHXZhN9%Z)zEzPbL#u}YP{X3l-S5`s8RWgtQWMQw%2OmXz*U`B0q(6}7>R)4D zMd2|Z-wEE%srJRuYtb}N_eGJ771vjD{qQ5@(BYNQ_d03g$g4BGkkhI?;+}7H?nchfnXj1gC8ec0D`L2$=gwqAbAv{>-H zj7lg~&%l-_rWX+}H^k^tZi-js!OZ$l%re2D>&ci{t`eBl@92d+nr-l_FFHhjZmP3F z-TOtc@R<}UDfPW007VOAeE4&4C6+otz^h`^k#3Z*`chDLxW0!%x+t^D(bPatE)pGD zV=>F}E&VaS$zUu#z0A>?NVF!`ZJiv1)J)Gzll(Ww&AAtU2;u`<3Np)$6u=qM=KOF^wZd|^XP^f7P6n&aLuq@HgM$gK^ ztHM}-h8U~IVj^jk)-72|&7czlBHTAoa%gX9X;U8dm)l_y_M?k;N`dpG2{p5;L>s?c zodUb-FF-`E?Q%BP@hZ*!wZE9B45MOej>TG3g8X&C@Y?(-w>YSLt z$Bg;qf#OfT2?i(52|P!xx6?9h{-;4*yp-XYQfIpVMrIUC=XyxW!0$=ymWKv~1@HexOZF469^g z=1T?IHH5;gLzHctPy#aXZ;(c<_MCOnvz?Utjc@T#>~Z=uk6DW?ovg%Hyd`=e-FB?2WRBb z@!&ngMJw)-bPbiY;wpmtR)O8BQmX1dwUC3bj*}r%>1qTVGriv$&Gsl~5&JaiPP^QY z*Zr4Zq(%wyM(XNE1GdlYHBzj-I%fvSXh9xEWH3uI30kwG@ljk>0-f&q_|sSPHEU7? zVK1FA;2p!`=l|M1jv!}3JcS}a+oBRZ&mfFCX&A14pd=*v`Luswt~x_UIY|)rcAghg zl^NWU5%4&!Kzns8?4ELH6qoZE4|+f-T-+ZJ>O;^2V)}8<%HjEuxXYz9`mc@p0)_fu z);L^XsP^|u`i}I!4VBH~d-LJ$cX}5%Tfzl6r>p%AmCKZN`<+wvLMN6yQ2H!JljwHA zvzYclW|Yq+>Wj`G6sz!T_-TK!E|n^jTmBkV`WP*6^HrENKN2|_0rb9&jSIgGvMvfx z08b15hzAJ%em@)`qSi|-Bs$9+ecRhCVti5oghS2D3%Bt=gZ|oh5S?)gYU>>-THnw- zCQ6J!sD8_Z!wkhYQpDb$Z05r0b;{GPmUyzKX~gI>KFhWske zGYB}^mU1WRHz>cQKSENTUw%mk9emm!@tSjhrG9z1lEAeq;D)W^lKnl{T-h3hDRSKj zO`t=TIgI?z6B`;QG@+8;g8)}8@3`an2qxy*1X+(j zIb0bM{Sn=mjNpdWyq@>x%MYkE@(_P4Vb5Fg=*(8kHTmC&hK7bIXCofHLxx_`0z~x?zVqid+p}sOD4(f4S)JybZ3S2=dD`p5CoykBZ@3Z$a6#!~Fk4*jI;D-L2aK zA|VaZ-2&2~q;yJ4w<6t0cOzW_f*{?^BBWD5x>G`>K|(t3T+45tz0Wz%z4u=qpEdc- zcyqjC6l?r6Dodk&OAa~9hdER`GQZ+y2od<-5WX*Jx_n`wU+5TN4_OMhS@-N5R_CT( zcB|H(k-@?bi_)`=px|bM*r^R{tk2Hub5zq2nY{C;ra%I9%cp+>zO;v-k7*Hwp#2Et ztg>5?q4%2U6t!!8Pm8naFhMz``he$UW-ESu<=!UG*5Y|)U{(mMm#Q>7?HuxGT|pw! zU%a4({+J}wf_Ik(!^vD3%{E=r!J|LP;(=hh<1YQH@52AW0^mF@8bf0vPkZg2)0^9C z!(#Gz1YYEGn%o_T)J@mWhsd5!#S8tkoP7)xExto2!BKhVIpa}V7#$KURVZFYPE`P(o1g)L4~R1n7_;f3*SY=&*H zeSg5(Jo3wSrc4bbP?{%5616=jrE*!^`V9BY<-!YUkBN)1*x_;bH=3UK<0MPDF{3FQ zl%2L7eW8ehu-D?`V& z&(AEM1CN{6Z(EQtH#-rNFYQjhb~~(%%3;shnz>7&$<6KcHk<4oi^S2X3K2c?!DAFl z!Bk#5EgYIh64W{mtKz_WTt0!-R9D>9KqhaBkyy^N&G8|lwh5unEhezh@pX-Wp2;30 zy2@)9PWBTA1$ssmQvKee4WDz;F9>%6XIN!vlMg`vgFa0X8;TtJ*^7L<>qz|N3-7ZB zbOy;_WS2<6Q*5)kWdJ$8{I}HKkiKC}^;{zKbVG;TB?>xp^rk~4n3}~&s>HPSZ%%; zKlSWyvFL)IU4X_u#ol=PBu1z!I9|R*3!`Dng%kn`LuT;#00KcVR_mi-p@?T6lTgP0^J@e?F_f2U{c3!LL$XvV zg&1eVWS&M>C(}6cpY3}|vPp;bgJ!OQp5+Wu7b9e!Fnu=Q8I(FF+N~9dCTcP7 z4;t)Wh_*QiU-}ufcnh}3?`e;lj4lMIG_3ca*k;~clO$Dg50$pc z8T4)%XvRPtqi0vdJ9@;&GX9_%B2hAkM4!{ErgaT5$4cmPjhe_H|2aQXc^#YC*(erm zx=k@>%k~RYFLmo}m`cgZwE&%O!S)tqy2-bG(dH5>&K!+r4m3$mK4IwcJ?(bs+ZxN8 z*E2p?5CaOj968gKf9zEnJXM!NJtRxz!Ypyxnw<|&TRd-L>3k|bjI|!AfMB1UPpnLxV3pWODgQP;{fy#2g2WvknV`Hp+eM$ z#)N}o#3ebB5KVibU$w4-s;)74rN8&Ox!I%V>dQ;;(fVgv-A{1}{E=9FqU7u2Oj!+t z200qI{H7@ywWV$C%+61Mo9!*i?)FjUV~6%}HSca;T6 z^i>9gu$*^OriY5RG*%mf+8l@9rHHSz^;FX7O|#D~lEkn6HkAOWOyBaj&oY&97Wn-c z0f|CqUTfyvhv=HgQB3Z~7((ZDks0IinRFs$uUF2=tI6*8d58thGO*4mMZ!YV7mjSh zi)tG($8pKcK@S22{_<~DGT7j+ONICj%1uL#l^Ix~AQ}1A?$7uxxh^W#Gccf&hW?@_ zHW!79X#SCi9~J?tA(AwIZHRdd2_K3-nNC7FnN$t7l98|Jd6)de zVKHB=D8SfKyzUAAmT1_h8~2LZB!ZZG;hEy98;}Y$k7(snHGheBu(`SWGO;P)tzI0p z{Nopy#JG_xl7lrETJY{?H@t)B9v=%2&QE#SHY$EdoJPg^>)*39v>o9vprOJGf{-_7 zI{?^=$nV+fxqX5VjvRdE&I;7_C-31^39OTni3DWA^GQV?b zUEL`D7}`#>pb=G+S^d{)J>zRKC8u76w)%LahTanlHf_|J*=hMMODaD+;O-Jkbcjr8 z{+3cDTTn?@-lR!VPw`WIKSjL2>O)%r`mDrXlm<|3K~D~S#{>Q8oVW9#r5h#*i3s?f z9#RuHTAV=Fk+pNGv;>gJ)?{;c49-d$;ob zMQ`o)B!f#S^MS}!TA4u&^MvK6R~kNsOWO7cwZn>el=-HIBDBd)i@y(~pru}Qc{eK= z1H>V@iOo?Xf2nVZ%~+Z1&ZrHC!z8m2jO?8Uhf(_HosnO5r-K) z4y#Kc;%NoV-!P3iP00WcVTV=}hxN2i6pe&x-1Fw5x1;QZ+p$Macyi{W=4&(|mvb#` zqSR&I6K9gl=tZo~Gz`D1ry0`4%dKx`?L6jt+cmqgPwYp^>gbo-iPb36f^+w?9&^G zq7R#SroA(R@9ZW#XSe|#rgzqn4)9|26{N8Vaxv=eEiooBWp*cp=da1+gh-;^*V~ve z7R_4Wmg&tsa57cAjm}7weqr&O@9zB0!1`Els7f~F#m~R)nWCV7rPh^d%9fu~JRQ)W zAg#{}TP!#Y&Di=`{3u5p#bt%Y4w?ieZZN@?aVn4T>5OY^$ZwhE; z6@uKN3!=(OWyI$9P}jKv^<`XVOmvm4@>6Hp#kXPi7zEAx^?NN8v;Z3$`U`*FtfNtj_qNXB6$0HZ*INk7uVH4`x>=ZH&I=IXn0mXZX| zGdoIW52&0&e+lK_(|W$5*k?XgFq-=>C(R*0)2vow<2R)Vp|E-26v-bka@kj(u*T zW}djZTCaV(t+M}a98NY*$XwMwS&4uAXA8exmC6mtV!jH!dF~Q>%8oPOb7xmnk^65n z_Qd7h5es@{TuROAgXs6}TdgYxE1N7r5o}E@4W{&SE}m3P`5|j$$|RhBOwHhoCUb&BxGGKJum&QXR$Umt2hv*JvtDS(yCTZ z(Ls0!T=*!Tm?-Xm4A6v!7Io# zDOUH#+n<;1pmDbx`u8Xr))dchLBSiOaCqX-Q1cxb#LVuQ!T0PC`@t90kH;r2G0LMi z3dk9amycVk939!qiwnX>qagNa(^P{x#^+SdT%TM@T(+m2uO(gN z-O9q~hIG^Ims>V1f85@9XILc$qr&I@ZMNF;2n|q(cQApilLM=v<>mN>_h#)R1{I^~+UHi$C0g(p^a>YOoM0%yoU-+lYagwi2sMMVg4y@)LKo@PEvbsyKx!hlpU0$M3vsEUBZMRtJPcy%MAkHgUd zZ#jv6+-k*(O(8iReSx=>s)40M8B{?q|Mu|<;Xyi20!)jc@Xgv+ZmueBv75QKeVA5x zq@p{JPjJ)B1pW&eXx3_*+E4dyy3ziWQB2L|b+J8xb%wuWA!v+KA*5c=xGr2Bu8quE z6Fz;PY?P3;*Zd*t6s?69g|%1`<$3hqyEgeq{h@e=EU>>^6BbQ&3dNsmDN8ju{X~*u zbe@GxCvIB6gES-1bdQ3({KLMNve?+tewD415x8+he{WpQH`O;KG-EAb^da>bEl;Ty zGs1Y+tKXfQ6 zyPULvLhSv{gSD#x;xsbR790G76H5{m`NS)x{3KXkF3E5z=IZ~BznIoa-~q@)xW8+$ z`=`F7L$rYsCYmAB#62-t0%#A)PyRS;V5xkdeKqpH+;{*IlSbC@+-7m=yR$R8pI8ZO zoqfqrl_B;u1R7o)`!;)xJ&$ez9_2sK>Mf&e-DiD6}>7zaX_!)a%z*YYV^{J z0&=nzHyblyY*tny#Foufej;HP;XOm{u@I9nBYfD8`=P{rjlpj8xaI_9)sV>lKMq2u zQf)Tkt&_$|Q<;&NFm^e>;fpUl=giUy+sTyqCKjtJBDjHB1#@X`Te7w#v7$A5M(p_Q zUUn5%-{O#h{Zsw{ro44X{1PlG32fzflrak8L%yept{aV-_$Q)}2+iNf(0G>SAqQMx zIqr)$v?%-n_-(O-dU_FGOVb`vXDLI?AOGoq(YEu5d(aHUpCd^~i-`a~#PLsR0EVHfyB@k+6+>0OmlT_*KdzIE^8U$UDOY{drR9jwqxKQhDUx1Ma0 zCyg)N@kS&QjG56Ej&9s7*FZ)oKYZJ;EO*IqxwxPo+YuJB^&ASNtj!CYi5}c`CTPs6 zgE`s2N1>Itpjd(Uxg9x@fIX;8r

du$;twt?Qx16bIa~sRB~%SD){XKI%f6! zaDXZWYKx_I6Wg6E-HQ^wVyi6mfsu|m6feV!i?YGNf>btc@@(tgT;8dC%^9|i`# zNl7QR^q?mkfAy=Cqq}U8}TMfUKw63KHpE)Nc>74#KNd>J{bIllh zSUAr_Bfd=(Yc@+&?(vG~M~Jcr|K@?^bjI4tjrk9%4Dr1*PJcQh@(_su<U8?&#RIXx>{$|H3&z9m!MtIpFOu4?~=pob!3Mk;`V`D(mtPw>K+t6 zPbliq@0NsKs(X5f6U(4+fFUF40P`81kNTxA>x4r6W3lwacvIo0-@vKJG>M8%DKBz; zRrNX6YDz`Q%R@tX^5Z8^l=g3_Ez#suA|;MXkC8_zr4Bhdqk<39F!N65EkD=D9X0PCh1%rT1{EVoh{2%H|c={7o0^H@#yuqW;3hU!I9pO~i z0i{fXLCM{_Zu7XpcW#a6=lY89mFr%uLhv9~U}N4y&8SHc#=c_=5TG zT>u;@hlGJFS{1PyMrBivW31+JS&XD8XmI}o~1;B9R-aMCF>)PUZ`iMa>t!J?#kj~}eaP3A+ z(n?{6or(&s-gXfQ3T~qi_H?)e??ob=&o&F-#K`oDInWg;GmU^a-h;0DP*WDqU!#xb zb0}-gpyqskx?#Q2L4iP84WK}J9Ytw^NSpS-DU{UDU5KhHt+@pw&s1M7#X~wDr1-mR9XaZ%L%5_wf~s{M(Ry@>xGe zc=FjUBrOK9$MV_Cb=>L!rsKu~C~wgD8w6igWPEaHvJHnOf$$C*cuYTReq;0$l?9mKgi1R+~Y_+4_!~2X=9-zyh zt1FEBfyP|nd5tX{opQu%yoV8vL5I|O3s$E+YyH{k@!hT06O}Bu)to4_qRR(CC(@X- zvKNN|Xnt+pmstS(}LbmJEMPBdN%0HCJ4iN<2V9RNr=svE$>aSSM`H`)Dd@R@$t{q~CESU4^Q zptfH-@*W~1J9h>9l^V7-P1}u+YG29GSxgL4om>M3o(*=7xS=dc z8On%W5=|P(!L(5+bb7zAH%THso-yoK1`4-hTIqz&OB`wSfEvoq8OBsoA6h!&-NY?9 zXi2FosetFJ!b@p<93(Si8bjTH0AU0;~APJ|HyA1EHN9d zi?q66j9^_4@n_HbKNzoYiMI5dG<34aa9BCy+r z^#PBKQU8%5XPZlM{gqr~Z0)g{kGDdEDj!oIy|DLM5Lzaa7tmex)9$G8zI0M5$rBlF zz}0079N7smJ%$QN?$YB<05X^kAoP2guI23&m703~%z5MGokJ*0Ez@aZ2sONNJt(cN zA!DLLGWIxxzMlgJywJKGSX_zA8uesNM^Q*2f}JyhkR1~sHbVgmnX{4U?s*i}_JwgQ z*3Cz5^9@8~xrc4f#f?JS+YwQ55Bn8*Bk;>{%Yl^c5qI{*;@zLo7yR3ugo;G?2{#Gl z@YN^6G2YCXn_@K~sEmhumIW_AHbyt9!wq}`ILmARdDTBiVp3oDgq-Y)d&KPrAI!I^ zDeApVyKRBNLgx*YjnzYDrPV$2uAt}VX4I86Wj(Fsk(@xCC5+mb*L8#JO=i&Rh6hxE zU@E@}>yb;N&=u3+AsG zU1qI}0OFDVeZt8-fP}94o)@X+9wVcp*LWrT3k&c^7G2~C)wnn@odqCU`H?41BQo)W z=9ZLJo@l%pu!wxqn@PE|r2s#{u3mt?f^Y&z;*R#8BnxDJDo5s4Z9No2qjVoavlHtI z!|^8K>O*{sZBAE0WYE3OLFAaYoGC-qt67gJzasMwh*ZhZ28*k|@SV1W0R)|j8b zaq-3LM?jaXs!I~4ONNN^9yfvZq#4TI!|rZ_Z;f@iq4$WBIXqs-+33l(MGXFrhgJ0jQJ$$eq@^yLZ8gVfIpR%lm>aS=!~qRMq->R#`RT=B zAfTJs=WNs9_4{MJJDkHxV`_b*E^kPqdq&}2#_H~L()Da8HSWR6qn|;DY1KrQu9jTvW zJi#|;;}^VK=;QGOn$rn1YHKLwHRUZLdhV==iHT6!2KpK&qb@FL^+_=Zx_}HAnlw}f z%nrzanN@0EJ)6RBAbCxPiq`g6d?M6$NNt9UrKeX@axi(ccZ=9=6ilJ}UZB+jCYB@a z=K6Xw-36NMVD@s3F7p_iAEA0;tcf2ih6_PZ8Vt=bPBXG1NQ`y4g>v!p-sbc!AJ&3N zA&x39R{ZRUxzqQP*<+x?@Vt%tb&1vRlPzi~NkRuzeyrm1Z95xswsru9y*R`3j_U|P zdyjzoR6qPkr~?EF?lnj^n1o9pn)!2kJVDILA4!_&;!rPQhg_yrN#WNW8TRcc3f_+X zm~Om_HA1$mY8Ajdoi~p|Cc(0+{QvS){UK z%??j4@Uvac{#1MS?ALlm9d~Oaf5g4~_+}HJ=`&($Hl6d!CmQ#$8sdJ|D?59$ zo?dSkex~VQoR1X2AK;OmawyB^)|GD;at`~i`?|blVXOV+{Vn;Zh3yR2IJ3bU?33Rr z83vl447h`)-yFV{J(M8ZTCOiKd#5IWGyYKjl@bOiWTuccC&cpywWW7>9Hzw2l|yO- zT73`={Ln#(?_aIQFJ5hb@ZbTzT-DMK>RZy4(I4C>tQ*U*>cyjsfl!51s}le zGbBR9y<-m2km~@CY>|Pk~jH9u?S3-nom^0+5y=M?L(=@A~!#E zIK6ro>fg$UoB|^77K4||)bY%6ewJP}mk%3fuAZGIUN|9-CTbR`1jqn>DAI*URF5%3 zn@R7|T6#^;AYqD+75*H#$Q5t8=Bi*rdBEEfE`lrUEoQdeH_n*W{w~2mqLqUhm%s)q zH$Js@Y}wVJ{Nf#m5};OlJ;d<*p5c!mWNyn+~IEE_j!l92^8uDKgs{ zWbRq|!(M^%Ze{NskPDSF0uSIJMMLN6{L?`ipV*c8MdqKV*PJXgW~mVbtZ9r*we!fY z1Din97BslpUT(t3i;auh#ZCVsD5KowEr3MV-qJu?`g=L>`{K>8h z+>~(isxDWUT)%h~?;HP;d5tmukLFf;8NFi7GnB?@yxUw}^u0^R_jy^LdyYC?e?>KH zNvcl#mq8e(=FKB(mx%N+wESR^A%yp++-oQlubF1qreeba@pJ1VFk9I<@O@!ij#UV@ z4Rw2CYV9!;`jEU*RhP~u)R{URui1IQY<2Dk+?#p&*?aj@&ZauwMb7&CDxv#4`FJ>? zwaPqeT8CF0EYx;kU%~taerBjX#F#4!#0+}$xUyeK_-y(|?U)UO<}1G(h);x^hw0dP zXfBA5VJ+dWKA8`nxD}YpWc?hY$L^@dK(ORtVi`mOtwC{1ynQl%Cqj# z^gwy)b@lq>!P1xR2H77Y52Jcakd{mbIPMPXu*a_=g0{rXuCSh|da#b*)4P1@B}wlS zxq&ojzSkT!`gJ<;g+&cG=w3$gGk%)C)*i%s@y5!o|4S) zerB5VDV|b!HnyjCni*2@LHPO&tKny*c1vkSUVPYjO+lnsd?te;HKn_%DI%VB#WYg` z!fpZtr<((SznlN~>toA3P<9jbaa9^@iEB^b<$0y^^KT8v(mUyU;j-z>iN;xHGlWMD zLQuYdb$u^@g%-5XjP;OFZ$!IF*v4>ylq*kR$5io6L?kp&_$oQTU%J)V%il7Uq-lzy zISIG3$b#)KbMJd`h}wjqiwG12gUwFm;S*TzKCMc6x>dp@NAU`K|rd>2=kc6=I9;jZZJGt^|BcsukqbfKE@)Ki}tIDx5o^GlP*i-Ua)< zn#bEH)|+l&L&OPQNI1x?=tQl!G}o8n_Z7Un*|7vk*w9=LVw|`j08rgth&|U@rZ@x# zF?6W#u&U?&G$`&)#Hr9{P-TTfBih7qO~YZrGC85#5I0N97z<&avquy6Q<$0hhIuXP zZ1t&M9FVo!tB_6QD=~g-<`73{;z3tAlEK=HLnryZ`98rx38~4hl3{_MxUWc(a2!MFqSI!^U@LRt=xvHt$pOP<

8UGX*k6@G@!{O zjcx^v^65#8Kg`lB7X~O?$uvqRL`s&bJ|K7!v4Fatj`dhd6Y*X9QJqHHj!RR2-_MrJvF9mQHUgD#VL>74%~7?ktV>0}Tt_6(@rB z*M+u6ttYJ1_^YzT!;E;v6;|>1sr;CQAk)Opx(8}sHo`UY{ny6oEj@~~1!8xCoY!}R zi}d~I05~PDiB0#F=sS4A50g33R$IaDA73)?HWi3uh<5b#Asc>QuyD?@*v-N-PAFQ0 zmXOGyZ=*gtH&jC!cEc3bRbLfeZ|~8NOsUN9rN8bUu>fqzh5S$oN<3oDq~ttlla6Jfd;{?GK{4h9FB56`Ax2&@K0; z!)bsTQ-KoYyKnmvvgAen{;#3V?on=`UCK)J?;~$uVekzhH7HepGTBlCSbo}=jJ5o8U4Kvz=X(uWK;p)uZymUZf<|9K!bLNe_<@_0F3^7g z1~?{uz{M-Ap@d&q>c#PcMI#u}ad1?IK=Xs^$4Qm7y3!eI<|x$6$Up!qH>zxXL9Au* zRmWlL=HFpr@R1tOj|AmO$}b&u&eY0lC1W-DSM3jH%Q?)3=WET=V(zMe{1rOHnhMNK z^WS;a%7ZGoJfw=^lBIR{(ZnnTLc|p`uX{Fsh-57E`LE5HHXkp0MDHxtO7fyqgd?}0 zEQpYOrK{ifRAW}E^bzbW=+>#Twnk`Cnmn=?n+WSq)}=kPZEu8Q{|7-T$^;OkXCV{K z*5+F+ns@*y0ui}nsmypC%BAlJH5Mv=BzjWqe~)#^>&^?L3MjKYZ!~!VA{Nmef`)Iu zn7)&5ytKghQd{U^{le2uA2!Dfe82?x%46ieU91NJwd-t2`zpq6x`jq8Iq=@SL==-U%?5A@DW=XV7njEJhRqGQv)hNa;{J5nFiS2Ei@@7HLT z+O82N3hR8aG8#kZCum0xrKv$V7Zm!N3Xv7R8IOz)P-J<;Bf9S`vHw;;ZVV(J1}5{za;^lN>k*0x}(P(3`RObN|y7KY*CP z0xBuU7?bj+FT?=BfAC4lF7B>aoIO>jjOlTIVkbs9X{j8n->tpXIGgqtmzcvE{99^% zeugvQEy4?h-xQb%Y=kUYwlO}?TPBhjV-CRTUG@9f4UxJaXm^2eP_ znUmN+&Wi;yxMMAzXQxsAgU~CG8TQ@syI>88y9nCB#KhxqW>s4_^gT_q2*QyKh^jmo zJ6*6%HuLR@`zp_8Jy#ENncWEMX5EJ!mnj@@XH*Xu>fd18lUi)1>8bX4)p*ZsbjXS@ z_?i>Yw>hvZO*NuJV~t{85UBBYKKkPNLVSwZvCCb_Qs(X(Q=_alm;->e{$G)Zxin?; zH}!&9?3Cn!a7_>-tbQ-YU)|g+#pJp_)ooOxBoeHme*EpToQCkp#1~l1MA$5+4B!@2 zV93s&9-QIT-iP$QXAPU-v4M67A9=+;n zJA@5)*%Fj5VM(8rB1=6oC8+qh9|(iUH2jo1!-IVa4?@DdkB6m?%D@Gk~@CM{Y0}=ywgKr0CQbs0Khb8 zZ6@t%wv7vYZEZ|ew&X2NrxdR$Uf{A(qN=%2{RapQoI?XR2QVPfIXIJ`z_5^0pLzl~ zjTmfrlu59dTwF8-ic?yz`jYbwS(AzU8X6~2>naK=1?9YW8_M9K(DXWy#)abk8lmQH z36;V?KMSlf^>-D;x%hI}Dvg0~zuPYSrgqSq>7J7&y)iH2_$XCB#MEx723=_dpR^d0 z+L#D{afr>HkP~wSpe~t#AX9-`lI0CxB=xYg?~h|EroH`sb_)Q3YamoIxk^p8+wCsG zfbj`Q_rBkw0TUe1aDTb=x})YnWjN%KmoTjl-LfwwJvA0ul28v2nlENLlZJOBK(zXU^Fc&Yf#Hj0Z);rE2 zU*%pML>O4f{F>3@A8XYCK(aLYMEXyRqaiS&8s zwmLM3;zs8%MFB6Jh#=ratRXt1)~vlSbdh{x<HgrfNz zFETXUEge9$wR=Eg`O{t6vsW;S@&_(HFxX!ZI^U`Ej(5_PV>0g{bd3_w6YXBk>*jgh zYX8aVKRE_86_IJ*J%tjMfr3~~0M!3Qi)w4n`OU+(7a028ul=0e^S4Ro|J@aiLt9dT zo&>#Dv-sM1PpPwbfQL$wewDZ{Y@+-3fdib9%d%@DnI~bOG@sVPvS;?o5;=hP=@bL~ zST#Nf7M?T85SGB+x_>Fkr~A#B*&_T%?fvUN-obnR0I>CCp^RZ?0QwTG5fIe^u~pE( zp;%ELRQn#yNWjva`KOsLLsh|~-i@vZB^a9Qc|(=z2C~R3dha|&weJI&AFG4z{o3&J78}{y{Yz#IB9rzG*{<00}+y1+NY|`du$DRom z`Tg^_F*c;A1BqX?9^M%3MEe7bo>)GY$E)oX*$fzTS-LCK21>?dOYQKh=PgBbSO0W= zT=SactfQduM9+$~g3iObc<6>{jXk!Cz#gLn>zq6@Q?kx(FaNPRK*2Bh+y+Gw!( z0jzX|?>GnxP~!0VuiY36&s!u06D3Ps0E-N*wUtG0{qJH`U)$%8(jF!SGVetZ;a~o> zzsI~e1W*@}j(xHDEdxchw5HcNII*xI2{6H_Xk#Bf;WU zyNtUmb^fFM+TlAu2ZYz7q(>>Tq_e(VfwA}*YNyP#v-&xJEku!lYdHDhDNR`W_4nLp z&4U`q?!|jzTHW9x{LULzDBx=8iiaWCUkhq;54-&mbcp()<&WVX@5_VOjaow<^FLVY zou)7GuQ;oh(ui_(E`-H7>C^c@>_J@gE#e02;$Y=D5tv}9VqjDX3pFD0-nhX4Mi)BVI(RTUV+z3IoEi2ViheT zk*nLAZPkka-g(d8cTW3Sk`I`x4lP7o*9>?Q6NXg5>SzO53L#Gd>eD7b#Aw8hZq`%c zaQ70iFcDfKbJ`UwW_yo^@D+L>)n6yO(zN$h&gVh20`xs`#&@{AiC0a2Et8iDs>vK3 z0AvipxcW?p^0_93LT z+53m2ZlC<|YX$z-nv#f^6m4+pyXZl07G=a;lTqZD!RSGC(e%87)2VakQ=lP zC+VJ^Nj{ia@LDk3iNzj+Y7Ox4lF76DL9@n&<~Yifa7zEe0zm%?+4oi*Dah8KP}LVN z5aD;PmsqV*DXm&$xXswRc(#EAv#R!A6Kog}qe43XI@WkGQ{BP18@r<@IjI97QRm|F zN;+Kk^74{o*n4=67D^RaNlYb+BQA;o9BOeuQ{RfU zm{#NQ{jw=nfc=j&?6X5Ru-w@{%k?Ghf#m|>+Lj{7Sx8Jzl)s|RRI5|pk7D+Z=;q6P z=zmgRwTuaSX2TC8AK@bWmIS9NIIhvU@A&cotioI@5BaLJ2p41mi z7}3#eIx;BFp$zibg(#s8pgRn^s%n37R&|H_B!-zE&KJhf5^Z@g;=uf=jSHpctw^wb zGgkI2We13~GNuxGpgOdPC{OjjU;%i4(CpqwDYX7nIQp5|?o;Za!6zcKd&&nt`XVs~ z5_-<_teSRquidNRxY-Jn(}QpQa8q1TVDXBMV~H#Z5j=P|(nM0}d?;Y1r|x@(3y1Gl zX>6x=%-*8}n#zRwbT4DcoD`Ky--iE1Zab?4jv9t=PS&Su+Rq*Q{LID+=nIkWwz&a! z>Gm?2#7hdUK;YQ+Xb%4$@a@M=6&FT-Fn`Us$?M=UfGYtaj@)PqM6clAV@}V;uQtDt zUzBfaH(tz|#s;;z4&rE6|7O4N@pX$m-^U<_xdU9Y|Hcmbp`Og6Tc3s$x;G*Sglzi$ z(ijh@lY9y5;2hebN?u;k|VGoR?2*Lpsxqzbpy z^>tQEIw8EERB5&{`S)7kI!8OSFX1tCuOK$_6Qoph&}G21z@q?ALrhI(|A>q{bBZIs zwi|U{R}j8*r9>>#73@C_dRK2DI2aAc-EgJQgvLpG>5EI1YBSZ7(?79eRho`ro=;d; zQ}P(R8*6dB=R0kfzkxzCxJ@Q>ETB{=N~dsHokIZqqUR`7U_Ki%FGE*DzvE|{P214s zsKTg#0#69mSVyq9DpGr2{GOT)I5`tX#_{9P6$00a-&osfjIKpt2?)Cl6hZW0%%mwq zILRh?flC7Fu;c$t_!-Oz{17zWd4PDlF#umHJ&P=3UN3o5*;?^yrZ9l+A1^_ES3guU zv=`)N$^Y~>jV>02a1gYvd;8}P*-jQjv~?yiOKn>S`%q6cKfU1W?(Zt4|4=$m3M5KR zMuMAx0^f(S`mvn)b|GDJ*v{MyEjt|aVb+Q46TnnH)T}C0ZD&tt7yOcd=9pvzZaC#D z&nN?+EJ|S13AR#_Y2Dgz^jo1}4t&5_2ukz)M|*L5PL|BDj04KZb#YNu_#9csMd}80 zVK`glBCs8xj!sFUAmx#DY7USdf*pqxL;&!?TdFYx$FPRDatKVRzcQYT!Q-&MGz7E! zDHiw%&hQ|7UWzAMPSl__7E*pwrg?e1j@IIPOLB8_Lj^P**Q~tF+~D??TS;H{Mm&^? zWF{nck!k*Iq5l5J9ZCQ?9&S5Tr24@^Ezy=t;2jDSTMgB-GcGLul3$NAGDIjGmCbr1 z<=~2h7Jnv0pSb)2!$xMm=-_ETGcRuZg z)k3;|u;qMcc!`x9$~U$~;S}~rfN3mi-}H%=5aU#(&d@*_-EVYg&PV$NHZj!&*aiD2 z%$u|%2~ax#?l=qR{~T_O(=M9<0J#ZJ9>m7Rh68BqAV5ATQDZt_e1CTvL9glYXCr#p zwGxm#0)4^TuWbM-DFKjy)Bu;PY1_vak8L)P@DSi|3sWm95TXT6c4x8p?3c-wCChc| zpg`(4Antbe48luC7o3RaEofi)oR+2J4}f-Os~0N80nWe}ofh{a$x>h1*+$1Vb8S9? z`c3cNH~h4)11k)yw*tTiEe##Xpzm+3NntUNRSuo=(nA4>7zmeZnmiTL7zrd#L zDEyC2^BMZDO~cGNy}OkOPszq77u4AMwO8r;0D}ekt8wP@9%uvn*mld zS~LQ{^r?NWPOt@CZHiQ~5xg8C5CMAnm!cpIqWb`o*?bh~vx*{SyZG*YC4dEd?y(Oz zA%6{h!~;xw7*J$dyx4n)>+dDvz@VVZ6>=Obb5p6Pt|y8B{<`|3H&WjlV1-p$Jze5U zG~Wum+v<>OmV}TEvwpI-A{@MY9diVLxWQjlY)SL(SdZK^*~@QSKaDp_T)W0`t;wjD zel>E7UTuyj@->Nicl)9Vdf9fA|J%Oxul4#U!>Ww9%)hC0z3!hQ(-xKm3Abb6PKYsO zvoycnK;bOUE4M!fSlr=V>7H5aF()#4G7t&5SB;AX;6Ko9M3}F+?y4E1rB|p z+ScYTrbDI|RgfX2R9hWtl#FwV7jP6%D^ z_)-^Z@slt#cRvZ# zrnGuowGR8jbGG#B&lWo`tq$DwE$o=qbIIzueF+P&M%l$yD&W_0U?j6-6!T8M{K!wK z8`^Tuw!OqV#-PQiUxxxNVFWa*m^r*2XPRfL_(&NDEMAX)eck^*bbV!5RnZo$lyrBu zbazNgh)79?ba$t8H%KE$i&D}^mvnb`NC<+I)LRF=_ucp7{gdzWoU`|ex#k*k%rSyK z0(5HZlN(~(#X-ym&=?!LHAST@n-VW&umwfFHH@4X2*$%9Fen(IUg_@lq+-_Lnj{Yv zOtkR&C0Hbz=*NJg6-F%+dvX!4i^HHoBTVJr4ZGO>iPY|)D@@x4pZw?7V-TG?HhbgN zLD0#1Zy<$CzLB7G0&^VQ{RTqhhN97yrBRZmVgR)6Z5p&iajNQfcV71&>HBXl7VUj> zD6vGCcOIlq`K@P|#Wo^z@Wy#*jGcFe0;a!cbO93ZRU^;fy^GU0$FIOY*kF-lp1i zu`cUL{vmj!MwU5u;AeOX)^S#Q?gy;#9t+x7!dPlMN~tR6^L;ga#Yje@rkU@g^%<1c z-viUA`a8eAIIMhV@xBzy(gA%Mur)9d3xKk$1uH^C7dZDFSeXkTI-A`pxtni^`dqcY zWt9c<&DBe=i)EKt%NNRTu9yDMQ21sF%ysJbw=S$Y_ipUpOb1zSn_3#T`)k{QOWM67 zv0+W$+p8!hnfuvs=EaxGzb%>R%oHwN1w%i7`?@}QcO-O zfm!?219wO{-w^imc}QXYJAkaoI9Xw{(BNE+*_O#lA!D1ab+Jj~#p|xY2uh2yzHJ?% zeh#Rk+pve=V1f7<-?vA0BjmGqtRPb>%;8DLovc^9DN(tFBJ0P;yCt^E83ew!SCS(4 zsP(CU_-lbHoqGa>e)EDelm>NC$|=ONHvsnqOw)-&CI$D@kvhy@7ezZZy_Y<8a#hrp;4iKg(u9=8JYlh z7U)aKar^rRc10X|xh%coVo}+Bf=kBW%Yx1S^-gls~Mrv`McO zav2lQN=0W8icWRldPc8o_=w*QbENE$j_Bzahb!x9?1*Ojc6-Izxa0a|osA5c4z8lv zQP-N{OOyN}Leg;_Ojvz$>J7A3Z^Fs8k>%qJPaVPb`~i~4|7UyF8LAt;0eR75n77JG z+K|KDu?w?h`hzsDAaM%8_elFo6l#XZFKDDr6rLwy48pV1@L!3=jiX018{#04H?SZl z;xCbfNeyP9$U5yLaNZJ&5Zr^N9N|C%GKGY#U*0qx4Ls=B&a&qT1y?P(ARzIGu*d5Yg9$Qmu~+~uO!^%AoiJc;u%eDhtHFnH=I7H2nNzK_Zl z#*J8>ae0O8L*h?O@_at5@s3s?OS2K8gQ(0`LgBbvZH~U05)q6qM~UZSAmaOnd(|p} z1_zNR5yg4gCPpY*s4M)V>4S>Y;0kc~Oz|X6)F*&?akmv+od3)1cguCU02tT=VU3-( z>N=q)QR$kj^ud(2GoXn)UxVCF`^@DlPyLps+O8GoY*3!lV!5Eq+a@WX0g{*yY}WMG zl&rOJ8d(|&3}iH4VwbH|Yfx4$GV?bQtNqfoTyP}9xntnsF438A{@~@P7>n>8HilZD z_EBS*yggw{cCoym)bDD{_xGOiefzWLYdRt)>o9GeQ%JV&K+D(a(>m7!^?TB6=gvNi zqWJG;6a=_kX9@)3;uFDJh}60hlJ`JWyOC9;0C5yW3(p#MOkGO;y#=ME1QiW!h!|(Y z7F<53ED=it{Jx1C(yw*w(QAV#By#of`?v3|AI{MllF{oIC^-hxIE;gxt*Gwg%@^47 zf-#x85$aW`>UJ;-`5iZ&*c~GyBf~5`4b>4GfBIgg<-`V0+A_X5j~OqzeSP&Ln$XU= zTBgP!)}ULG%4-f?n!XV7dSJXeS&CwS;5VqmARt?SFl4)!q2sYKp{#;Wn~LHsHTcLJkdTs>Yc64t zXtxANfdFbc8Dpv0%Z=ySSD28Y{~z+D#2phY4xO!UgSQ=8H$d4+o|TXaA@EZvY13(c zB7bHQUuU~cm<=(G{|@?`@`(vz+F`MFnNelzH}h-Wr9MHt$nH+|h5jL<-eIqJ49$t#WNh*EkUw^$obB3C7=f{e@9-HRB( zEL{-a{nEVPi{wTa`cRq#*+a}Y-BN5iI4Tk3$dnXmG-YSTR)CWH*)NXXS)V0v zK8{9w<*sXh!y!K`ymw?O&TkA^RCBx?f4amNZi3n$21LLY>v*$c!&CP>bVfj)u(KIp ze#kx_*}~CX=35TDb#2#4uxpo3-FFfEx*w;`yEB!v?yyt9$l7k5P)uePhraRC++PAc zc|mweqQ9z6uzxkk5S;Kh^!munwKwX-LHX(vy+Vx@FEM=W7raoSWMriCir}%4JDY=v zq2WqnH9y*nd|p_f={7FGRc|xj*&lcPTpRep`vcB@XpS@ID(CDMpd3sQt4!!)Afpmc zHIX7TL>AtSn50Mn)ov4#vbs*}dsgOg)gBo^adL=bC)9Nz7mxL#TBsx+bI6<>E- zUj}~;+Ho9D0|eK)M5MXmEvRz4qy0T*)3#^CA>@8`kcsw=iWzc@4n6x3r%Vy;VYu(k zeAT{Oe>_{(3fYd5(ldpPf~QAS zuXbY*l^jsmp+k)$X}VS#Hq*HoHYuOMbT}KzJ{vyX1m{7#uz?gtKY8|OS0EuqLa=eo zs8cj4rm1=Fq6*nL2RV8-!Pj_Bz6Byla2(w8W_=}ft-f8J?;@v;1Sf)Z8=a1uq152a zQYSuJHZx#)n~_Hoo&#>QA{qkFY1V{PjpLo6?e#z*zrj}tGWv=4b0mS6(IGSux^1_r zvW{H2czR}H0ZTuC=jX(TI;|%@COgu2;!y9A*cC5PHR?B&6LHe|Yr}%?Rdc|Ot#wHY zo?I}2SuPl{dQz(z(Y2k$TUL-GDyz)+F5|+A` zo0*%r`(HFs*pkNWjqtO4UwPj_=4G7}k<^1?oVzrF z{PIE`DMIUGKY_10oo@$PY>=o`9ZP)ef0d>Stnw<<5pHBk10H~=mWICm66u*{?7@r7rM)C6msjg^hsp18$jF>I zo$URtF%UjhKj2q&UHd(1*LDV#FCzGO;*%8YNTUcB;kTh$j7uqDyJi*;;N}Q|H3^kk zK9gu?1W`~6E;A+hGso_U58lPZ2H8t2mWLJRTnlk~i(QQb$TuI|48g44>aVRXxj7G< z&lCQRun_2CR6zj%%GxK$UYts%Zqs`TflTVB61_oA_Y$j2xUi|Kh}M_VJr;!*hR<^Z z55va{k48RY%*=U03dr*Au|m)tiS~Dp*CfB-=AvweC`7ZSk;ut9p|j9N%w*OdQ{E`* zt(`xl%%fBTVML(+kh!*2^dxgw4%_zyP}o^Ue8h0((7{h%bY+`8gKYq>)&rMBjVSjs zw~%K1kMpsoX2a`%!=hblrRwHzp>_H+b*}{uq^fV1N%>pt(gU z4ImKbYxk%uQ%pl#q{G6V=kFH;=lr&iWV4wme;=r1dfPPyGC0=mp;I2{p+ z6->8d#}>UrV|sq(Tu4g|4JvK;%r7oJK&zj2u(`32a~{Ney*c@ml{_1zM3pwdIMk&~ z3Vb}dJ%Q-)%a9n%$}6C=1%Hu1rlS$b}sz;`8Ndez?SrCS7_n zu{49U7&NDK@TD3kL_5tS7dAj<93l|4jqpWEuae<*1IKoap?$={vZSnVR*}U|DSo!M~E?x-5_K+ETpxh7q3b5 zxaqiyI-_vRyGq=tQ|H%ft;29JpaOnCQ`*=Cbl6aZ;WI-onkcR;g+clf`YKNL!{fNm zihtCZWsPT0YiN}mB$5t#jX@{Tdy~Z*lA-H6tA!f*9)lfH_AdU$GV|2}!15OpiQHRQ z5QibLO*GZ@{qd5-om%y)sYabK_iiWtM(5JnXR?Y=upVvxEd56!9++lNvjg!;`5?CwV1 znNJp+_f!CpkoLEQ{cVK@uVdgdD0$eI!F`Fkeapp4wf!atXo$3L9>He{SboftixH&E z6tppKQ#qjPiOq#Y4Qmq#-XgQJ?7~F-d`u+}qUucgIfV^Y!S6xqhyd<}5cyBn54}wa z$!}Y{>2c3EV*P2Md*o}B&}1;{2x)KWh8wTrmZCt@t z4p+xpMRRaDpzq7#DBO3=g=LepWyU?IAAAqIL3D(^_{DwQW)MMOj{P$SLg9^(QO zM#H;1K@{>O1#2KAeFQCmvJ015u&sqwL2YeqzA*Xok0TG9){yarCUXQX=!OW)?{&FNYq*=k zw=%3DhLM&tjWAIKPny8q*`f@ya3yU(aPle+W3?^a@JRG|{-A;2LRN_}a@6zkg8MMW ze?kg*lR?M5v^659Ew(SEMI&_wGFB{9{xsIxr!Ok4huM*T{-6}kl4U$D?|DI!qYDoS zX4Nx(m9i^UgN#q9!YptZSK(7e_E6x?%+yGS-jx=358y<6wBKF$Jo*A#Q(6o#M5>L< znD!R1HS2tE$I@8_55%w_f&LcY@TEKN6h4BYwwHW5vsBlz0=naKFlY}S#4)WPfO&>K z8N*1KX(s^8y)EU)W*+6qB&~9IPmWvE5_fB)DRstU9UP3Gm<(qv5V=k$6F}95<9&-T zuX{E$|K)Al_Q#P$CS(t9Z}E%6MhZ`)1J(uS5qSA+QIMvw&XKS1s1dw7K>!6{ZrA|RTNL8+{>JfOul1R#t=!fzmtdp z-JeK1Wj>how7IGXUe&j1NZ{ZHCfp%aUST>et&A?#<2WCVYSR4QcQ@Rem0(79Cs=4| z-4ZBdL?LOx!lH|Whxs<0{Q2c6Bc%aT9xmZ_(F4xUA{88y87++V{bD-zXejiHS9#(H z4T8{+u2FKDyzPBH=pOsG;GO32{X)#*x(?T_nGXw1rz=x{HMS5*=*4IB?U%c?R{9Jt zS9%1yR?a2o-(F0}DpPCWVpiggcVdWyG#yaq)2A11I0@GLcx#0oLdpa$-U+KN`jk}g zol`ajyHtCZ9=fB^3ncVcE4wueu-p5S0~pY(k9$q?o!0Xj3@Gb}6!Sl!CjlQuBXI{M z)hO8Xnvl!B4TrX&DX6%Tak8R9t*{yB3piKHya>2AZuoYYUb82s&4lpAjvq;VY^We@J+O3iay3t!CY)EO z=KNB5Kr4^ww$X`V>*OWfix?9%Y5z;)G>zC^y&;>B7Y#7B)LY_%jiyBPIY>}5lBmH5)25?v*lR5J~?IA_JL0Ky)Ad6X|BLQKz%3)wKNLbs~hCeO_d* znhGd5pQ;Awb{twt3>c`xq(b93VZIOgd+8w|Y|#NUm)A@hIe4)c4v+WSp3@3LWgBDs zi-{^LP01RSo0`xsGDQ-f-$#^XBx&@gB%^xzQZ-f}oVAH<4Dm)ulN9@Trco|ZhDNzI0m=3vw?&8Z_WD^NYcT$0O`v3s*!eT#^^0^Z^-$=_EZZg{22&kybDH?%|kUoJpw7#`|?#F5p$*#s%cGp8Jk zaOuCl#RB)=-|`#C10F??$)ZfQV9|3%h|Ot4lrM6xvl7uS1hq;r$p;39qd9JU+{9{1 z=;pWW%}%^7ABvpBU^CpZo=yY1@n`1){2wp3I4KRtVYeVlVUf-%BN<~10c0QCRTwi( zm{BF!CK?XXv!cU9w6$aki8G@+{zjo^EJ+lxe^z$&*kRd`9q#m}%be;R}Q_G*F z6d*#;L2`mFHVaccDA3NLudR;GAa$_)#eJ;{VJwPRzAQUV=GV@Vj8et96p-i_jN|X0 zgZeuILcwCHRwPh>@cavo>XJLo|8boHE(00}f8a(Zui#>9q~nIp@>>OIeBs*xGqa9g zFz%EoXZHSVm2GN*l8B!OBUNVB>t{Th zqM7v+?kr;C4GvgQ5`D|&!0x9la#5qppsE0|UC5fKFPnws=AkB1u>7wgep0d)wNhT~ZBn$8-!~f0$i;;u~CNwW(c85QzTT{(H zuPMMNL#Rt;vn%?3vfb}hyMcKaoBj<`#1}1W9OkIr^pkJS3k;VdL}r~i5AJ(l7QKyA@}bm-U0UR*C?9(!JZqjau*95f>D2{soCH{l7w7NYMjE z?(nN3^9XZQzf{!v)E$Yy%g>U9mJLolfB4}co6sBVe~!@Sdp|H;zv4VW1DgdA5l3qY z|C)YzJclb|htu4{F0D9^V7UI)V9NiyQ!#f*oO$kNULu$dbMq43FoCYL{!AzzjL|p{daXaMrv-?y-|49~|1tmJwLd}+&=-Z7(7~?i_Noa4wkIePEwj6`QwW80% zh6C)`{Z^}fxVi}6OwJmQI1DjYq1pX>xGFD(@Q(4>D}bTWM}ecD_5oBG!{Y?`N+UOS ze%GIPzkxRY>z^byAGnm2mDhH5%*f#_)g{{HFye1+lfN9(u&WRR`@2G}$Y^{q77P=T z@<`(^(`p%nTj~(7y5qhO!M3(tgoLOHO03^Lkj84*RuO~}*X87s^ICly(Hb$)>O>v2 zi6NO#YqJx_|5Ar>3YiM6(^L_6GT4C`vZvI4c!FsnHYKB2xz;Dv`^P&-pSzM4jjVRa zYVygZiM5!=oPCr_z<@5oe0(59Ye|F#$gPdU>-yJ|1U5+KTH28`{>|rZ1)WW*>KRWS ziPy2fn^IBepIJHiHI}U2S0Q<`2aP5lUdR?3D+b_amLi2cP6s2MZ2vk&gYz-r8#{I+ zw+d2}>CEVmMe|lWZ2hnI^wgR_EoizPXZ<|$lKov;j+Jv7Cvz66 zHK!JRh@F+s?Ugcx5%6rrphH_7Z-|e_Xz)5`e9x$c1VC80l>4$bYaF{ArhHBA>T|8l zIU->i+79kfiSOq(AbU!c(XA@q7WQeZKBDYNYMBspy%sY3kiLSkZ}n$2dX(ovQ&;6v zO?A|i#|ThI)D)0o`RxrtLRheWBm^J8)IC?IFjKvSoT9YWCm3blkBTMA7+>Tnf0(G+ z;VjmzQ4viO>L8EA`K@k?rW)!_-i*KnGumGM9R3}K=kGmC6hcl|AOnearv)U$41mk! z+O}yc-86)yp;c|>us_8>ubc;~`KMkbpf{`c{qkqEcOh;HFZY>}nV`vjdY!n$nd}<* zM>#z1O$J<{`Eg!o2Itz~xKdxqNiS1_^CE2M#{KoTFo|^XpKN>kg(TXc{;q*z|M-N# zV1FmbYoN&sX$to291j2IHKO6})_<2f##8D!X*s8>v_72B-l}-hFCk|XNBYW3o3k#! zbtrCj0PV0=>=yT9GLzv9wJSoif_MW^q(%eehY-I_fnrdWP2+9@6>3P?8wkUYL97;w z?tOnu&_b9KQqs~$gj|i(JHkbWt_KTnoP|iU?iN~-7={r~!;dg~WK*{f|8BaaG&wNp zHY=V__iHo2wZR0V4!-W}%O~IBeo2Y;fPXL4K9&GnXM8}e*55l8i3`y$wNMfc=qyL6K#U#eS_A`7xWH*)Kmqz0& zD_yA{of-r2wq+q*5nKNj={s1H01O8MNaA|9;mJ7)`gFliJO@KDof+FT0wZ2&WrsfR zoX&ZoIt|6{>tap+GgGowV2F?x*x~uzqx3}Khc&q$cY^t((EH{x{*3)BIC!wDOLDKV z)?r;DK-bgTTM}?Dash#$(rNqUT#Y5RhBPuNwdF#kVbI6Pdi%F&?1q6_i;yyXEU6IE z#lcJ&atyKGk^RM52MoXWMd^vNFuB|9mE+!2Ns<^WkY>Es>$fqMLbhGw?d1PTHEc(@ z1=*t2gPoHFRhaE$n(y>>k=)dW4iRt%@#1ZhP#_>E=ihTi;~7$w1Oh)jB&NBnKZ%cd zyj;1{ZqHq@%k6s~r2{MNq55Ldb%>$B$7nvarw(rx5~=mJ(Y3!7DgdNqose^;9A ztrnmpzh_R+RiJ< zMOt~>{d}(eA%0D_FcQcU{;(MIXnDu(2Zk=hg$z<{b6&|K`57F z@k%S1QSFH@c0@`p3;^YRnS=EZ@ME(0VeH8a$@r-WuzYer*sM*@5|C_xZYdqGIOp-~;mlP_!|?u?#9G{) z-BX(|&ylSxYvq2tSAd_! z`^a>VTEtF&HJ?(Ail2v%X)fc1XAI~X1u9*^z&{Sr42bnG#^ci{Y3`F&tINj6pRBcW zsYcZfH3s{`3}Obt(HQr|81CnR0F(UEAuGvAl|Q<5bsxpMcI!^PovBYC{ctf zLZ&e|x#Fs?qO=>zjTPtr2_@>IWNvcfcSoY2! zImNGX-2Qp=0H#t$&j6>E5Zf|9AK^Pdz;o+(nURXF7AHB@UKunxJA}tA<37C z!yB>Uv0cC!c+Wt6OA=)qt^?D<;=9qNl`NSXXgHX{LdnjqR2g7iTgfgBsAWS7D{X#l zViRNNB>#M29kY?~SrK$F6S@KGTpaj{hp+^1Xt6j;A;&o{e>)dl&MNdaz-0=bXmrKB zGM42E2N+3O++(7XeW2*9BW=(Vip_4hqTk@N{ms-6CIb5UOe_aYt-Yev>-iOpfK+F& zKQiTGny?!3u}BFqfE>~)RO#L$x8wQN4SQcoWdPWn{pDkh# z@D(t;XU*2Zy=j&n#$*|n*3=}Bl9EDzV>9Z#Z(XB)7n+Q)*%L(+?sx#9 zZnk09nh=ZjB85ZTs=0g6b25P~^~GC%)Wz2wcQ=15Q1naN#yIHy{> z8-`Cxg5XMBi!D$WWyCatXbh0X zS5b(0m#*(SMLD2EBLR7g_$Z<%&$|Ffl9*r^Y4JhiL3EF_66N(#H?`qFoK}>3*MkIE z*N~%aG9T-B-WTjL<6Q|&-Xr9zt)V9(0-WN^itbPym&f|)G}{H>~#^~Pb@$?3ExyP=1(n!k#-P99EigvGL<6cdTlZr>A6!uiPU>vNJ6 zfjl0g4KMMhZ~KN(^#vFXUR3Q<`@;(2Qfq&~d8VyEIPN7bC4UE3K!^YZ3_0>>01gCB z@<$wK>vS_GPvCC)!#L)V*}m;T2m0CStz&Tr6-H(C?Vj`?dZ}gRo-Hj zRVE4czrcW4&F)*HW1fa%R8m52k_dH9snh4TyNIv9+l z4%APmKLP6glCXdzZv4P3DVn7BbeL1&*=M7-pOvY!zklfn35G@9UQR6+Kmj|!P;K*8 zlgmu_SzjA%Nb9Gj0*tm`(e`{YqK^^^Zf=a6VYvyjpdD`{QEz>^OsvON$2vYd z#W63L;&pJPD?1exR=mTVdI^Hb@GI1)vX)U2z0r)&p5n{bVa<)zX(%81Qu0@Ex7t$v z9VY(|)bSlQ@?uN^J_fT47*Qe37vZ4}GLUuMn%=BcSi2jpbYDa+*3D30vYbf`5|lrJ zXEWubKjz5-pts>GosNUrD?H`%lPWaH5U}Z@15}&noCS7&;?ZU;Vi8+9Gdx<-YDzs~ zcC9bqJ0&2Yy$;hXX|h8#(BhU_>b7M{Cz} z%K!wZREQ;C7mMi3(tUS%MVJW=5eJvH6))EP%`$wSb^&!Ws(yL=s?rIJJY>3+CWO*)8#vBEjU

KY_o6)M+^!@1Rk>F;#4HslBoQ`DCn4_t|7G&M>R40Rj*n#Y-43gyz#tkt-K5wdcB$ClN4BKk?U zoT8zrNq(ZX&_SvIjZFADOrgX$jpnPBm0}!ydlUo!rjL4zCU560`iLmk3?LA2B^->! zpfKwQ>x)dq9vbZzOy@z3*tfX|-sx|gZvQ74E6{pjGYpJZ19QFy5pqp2m{)`+F%tZk zsYNz5;0t~rMtgt}o*+U9Q#=!rLo$CbUaKX79Z_?5% z_J1zfMpJa=blRcfIYyUeO)F<>`{N7%-noDw^N9ORP=!-Yp7WH9?3S>I_qiDtXDTYF z+cR1&A3yhif*8H(l%{@|iAagZG}A$ZKex%d!Mdh)`ws0sN!cH1qA4F30``K0*rP?I zt}kMFB0c4r4`f;zc=Y;({|1?_hO#wiWKGbUW;UTP#4tG3Y($?IEVzYe-jaTP&sNZ2 z+xV12NBM7&IN5q2_jgN6b!Sl_%x|z)7lJJeMawPE+{IQg-$vy|6HnlH)jgmZ^~Tcv z-sikUsUI57nS$apf5q_oq*H=XF{dpAoS>3-8MKC)$A8!je!rY+6qTLRuXlRwE}I~n zKr{b~t6uEeRR2e*aT19-BpKcrx^)V5$7FS{DxGG;K0M_8X z&L|92hmH(WYE1-FESy`PAQND*&`{6ikK|wqk`VL`eYK@<{Y)bvU1r<{SLrqB2viaT zP<~tsp=4Ih zwONPFL@Fs~feHhPZHGm(|Y>>Ag#lS5LU`Ma@{{GU(p-x*Z*l=g>p?^n>p;|IKW(xYpj1A7wprB89Nf z^jA~U_M?2+4F@zD_fQzQH2`NB&IUwPdg-If_9HB#Jd3-}^J#9#mf%Gh<$X;)sZF6_ zQE$CywTf9ri0t0kJ-=x15Na6zOihJ=#}wvp4xh%PKQvkghHhjf*cI7ZTvHQv%2I$V zyXAIqJsfW%)S4J?9ZL3_QLN@q<yC^+YqJPKjA}R}zZ2tOKF$Z$vlW{nL z&c~J-9O6)^FDiElhlTZcZ!b1kgEf=xT^Y<*J$1MeP=vp@l@757H`p}ZEZDj24I z#E|L95G!S_c;gFIA;cWu;Y+Yl$vg*}BHp+#7Uh#19P3zS>s=>7v9c631 zN_h)nkZM4*rh>mR43YTqz9=GZJ%Ft5eRl+C#xWr4^R!N!MbE?2sKDC+u)lP`3(9-7i_X)ZD_2o8$f zTB#+kBF`(B5i4|5)Wunx~_8O1%38-g`;SZR9+lv6>)YUQ_f1H|KDNwnJ zu68fthX$X+`MmXJ=)^n?kyCgV=WHoyy=~YxG|KR$I_PVzI%A%)b@Fn4>*!}I7?Hl>FrH@G&3qUTLl58sH8)aKlDWCZg= zbl{CwP7;_sLrm9x2&b5l!)>KuKN_D;yBk?Ba7(mvf*g<8xPTH7yeA=6S(Ju^>s26u zi$PwVq2C{zr~MFM@p6{lN(0sS=!O}3Ri(CLb@eUkS;p5!gAU=lbYs8HXvd@BKLvnB z5shb77W%ssKIR|=bH}&)Wnmkj6A2x^-wzu_;nLQo)?Q!73~UP}q#s(aTk-t<_=$a# zqjPT;X+_wF&0h9^V8Vs2cWxgv-A2a4Q>GHHGp-9MWDua}q#%}|Uyg(0Rb(M|1gV;< zuM_?SjL&OOBJjM^r*VeYJb^?QW?FvfRwT zc9g+vh9MsNlw=@(sa#?Ns7Be)^I4rfH6&)pwg$y<(n_(HWuJQtOdg~u-nrS_(934< zL%sU&K*y}*E6blhoqOyv&87W?mnj%pju zF{|5SJ*4Gd_WMk~i|A(ur#Aw^iAeFb$S$HC;88r%mcqWVEU??lR|1j-gJ5@A5bDC~ znRPt+(%_=4b_uwnj60+@HP+7sT57h2|0RO(52mvSCrvL^AOdkLaeIcOkhO13KPbzK ztE7;*_@0Nc;Ew37Tg-nVS!|s{0yGEo1~rGZG&PBBaS}~W^id&Ca|Pr%sYM~{j6JL$ z9@0$3?t=LbDOA=LySHyCOYPPCaxuStWyZ=*&-fnc8E)Rd?yXiaJ$qLn6xmjv zPGdvfok!4ERg8bjl?5~EN|{b9WSjIcZ1Jgk1u4~@sP`{CMm_>K4Z9`$cPro$?zVF< zg^r7%*7B>2n>EEfuF-W&rQe6Cn3@L+!y2Hp;n6A4NdJeTr#8GpZzd28xgL&FVwJgE zFYG%kQmFf=S_N0P4bctFpmdUzsSXqjAA)97MCXhBS$ly`XBPv|D=Yf(&%Ie?vo~%c zU3i^kH>Cc{Q%NpG{~w-;b>HA@_(B&8(uM~S1M%fB`U;zav6F0rLwD5Wrk2<(N|bze z9J!29_i0Z8LSs{t<7K~u=Wm|ZB#MToeSNY;_Km$MjRapM=u(QAdhTjCZl&q07L+K>7hl0?EF&4tNL<>lQX;WUmpRDEaJ zY_SdIB-(X7eVtp=ZeHE%$pG<(bBs0L*O{kc<*i-`38nz-hmiE1ME%Z*Vw&^&X$lww zDlQb`-@K2nPmtQ+_asI~)uwpudKY~6*m8RlsN*s_M$h>qvnBTKK;3rOdlEH}8^@%| z1o3>HvlyzP^hd{=9A@XAUS3jAO*qSfpC~=DzT>J)z25o{7l7sc5SxCX=g6X@zZo zhOWAOUO}3PluQ)Oy)O?07t_xtE$wKKx*7iF*g~|QaBwKwAzc^x1>BVGS)Ls6k5e8X zW!>|Ky6cj>F;Ddm3GM~7RmXtVYBb*zyoR;H@5)!?`R?Lgn|-;~sx$IK zth2?aN5>kkdQeIG{iI&UD%Yg-xWboGEmG>wvUM1elzb_MWqD%0Y>i5xEOXX)S819d z9-G;D-`W`6iT8n=|8SoC8)B0x1%CnISIM(&_}uu~ZkatsSL68d@2svaC}t6$1oK~9 z0+}C02;Pb9+wW}r+~V(XODbElaG8O>rqhmmo2l_;h}X_CGP8uk$FoYGBlCj$H^Y*i zeJRwJT*bkepFwB%_6KGPdmCfJft9nEOue?e&b>FOuk{7I%ggddVstQSl{niRLPScT zoU!i8c_bSm@Qh-2C%(U0rs0nKK3=BhntN&lYH_VhU2!=PQU_bH97-Q`?elG+ht?_8 z-K2Cl1`muk52g-nvKihEUH|xWiFSQN^cP-tM}5!mQlcFe0)=$-9`On!89E&rZpc7%RpMy1C#m43r}xdVW2(TDNUP5M?CmgjMgm0Ibi9#P-HXlh-*;5Tor<1pZj;_UESJQvs zw`Pd1m8HvXA?8nmhM1E-WjA9*izdtIzFZ`3gA{i7zg_CH z`T^H1H#4EpC@%4OSZMUgD)juvCO=dt6z0$bVM__a1B#UD%Yj6q&lizuKbvRcm zr9>7wX0Xnl{gDI|j*ty}*=7l?T5;ya)h1&F&E|ayT@hiePl6VoA$wpbd6sWCs&TZ& zl`ozzxc&0qRsZ>auKLVvR)B^Uu*!%pp#RyTT;;B5TDdf1#cL+qbuyN#($3I63`*;B zz9t1VC(@iex!AlmjEiwt%?$q8Fzs~o^43?yl-RO{%-95@i`5;Zw*=0KkLawO7=M3v z9dmkI;q2Zsvpthy0{+2n(zJW&bC)whY zy%Vz-4(6Wd4Ha#Kw6mVXoYzjo$^@_fRjBELk~%423jlvxo=D&Ay?R*~-uL%B&igk^ zX6}Q18AXiJHJWF}7qgbzndFO(sIOkP{9iV|TGY>y9qdQA%PKa}um>}Io}BI3_^GT; z&iRPW;I!&1$K5^(aI{PPE!pLy1pb5hhEBiCNFESFd)-ATFt_BAkbQf9?o!$p+GUrY z6ezV69ph>rl=>s)jz>R|R3@p1M>>b4jH@Cmzvp=WzV3)aMkCJx%{Q~CO`<*6Rbq1o zv-H(X;Pws0p3^70`zGN-WBKfeQHq+f}@2 zvV2_8=xTXa)~|aVD>arsS(Okmyc?>&2wdy<8xS4thNDEVWCDcOt~uaFTSd*d-jol+13pEi)$V{%6|{r>ttmcUud-=SXm8KAgg&zSC%VE2yN`T5ak-l#=Q|JQtl(^|#;1gK z-blOM<-gdq*}^`C3Dg*owwvTeqGr#YXZjsu0r&Nx_oxrj<$q_du9^inIks`B4Fu z`{8CAN$Lwv6W=5EU&X8D;UaKKT)b<2NZrS6-A%@v^vdWuLk~NPM{K0QgB+vTDQhw@ z?K?)_&~0Nks}I?(Ies=np*ZNdSP=V*K=LY4umQ<0s((*T-IjoNSXlU5>6?v@!VU$N z#FENxDNMbVQHb=?#E< z_O@KCW@R1>Vssw57j}6kKZm7yoRG{XR6G9 z=B8H?RTIrn^Zv8cX|R9NznvpzSXhYw_2O}nRb#V6qkAhuwhDZTF`C)zs}K};PNA^* z5H^7k{*!23f+lGpV+Zx`-8cSg$<-66sT$b7TsXC!xOUq?tQ$3~PGxMkO z#=D2fyo|iOTt@2PM(BwS+nU?U#|{LksGneU4I)_F-}TK$;Ng8H7yx9H!maS$=yhIx z=C1PYPXo~|rW!o7(UxQ1(RLdCAtVWQdSmq6tNKfogmfyqQ15*08%l3&&P^oq(bAsG$1Mzb z#<2NJF}Gw>4I_Mb7&&!xH)ReK{5DMbTZ_MIVp0V{K(Kt=0*6x8usXz#S>1b*mzO(# z!UVz=4_26d6iC|``hCnh^vc8ZtJ!)A*;}vWzb3FvU=WOS|MDcEP*8~1?_nJZT{Xcs zi*f%dU&1o1-XO^s?63N-j#$!OV1NJ>f(Z!}U=D8&n0b49KrlwD5eU@_lgghwulV&s z?v!89wsp22E`avwGJlIoRn>P~XyUp%0ZyvUmf<2X=2LO%r_jr;{V*pohbx%(t5aam z{F7xU7#UyUKddT782Ls-mQ5Z^(|@MszuVoDYxTg{s;Be~DvjBgSvM|ua6fb@lF=wP z|8slq+w3CJNM^#M|64paLyg?ydsLL4Ln61tsJ?7=nMcBGo=QXUO))y+#{K5bj`G5_7|=d0Dzc;TS)$R1r1ZAUUT!g zsK!jqQTd=sURtXZu?^d{@aAf2g+Z30<#HN+{a)cvca__0A%pG-f=l0LUbd0KR*b=Tb}JgRZNKfs7_%; z>#sK3$yhu6F1mgsI31yG2FAk7Ps!NOFqHZlJ1 zOx*Pbo?usl%fq=-ciOSY4Igm805gNS^pr1CuSY8AZA6Ox(jRKeY$&;^j+2{I3cC%d z#NRcn%?)pB?=wdIA=`wF~*S2oAqs@Qh zx`g3*!NvF6~AdHMm0*kb8~Pu_kPGRFl}WRRir1X zEHdu353#ZZ+P{H^4~^BOgT)aO>un9n54T|OFCcmbi84t7nF)F=pzBHJCayg2#+5Cfjv-1`c$uS8PG(RS9*KSBtFc-1ep)x8m;2kFli@>L*Y2jr4lEYi6jb&P~ zp-(|wjh0;UocYtVq&2oPA$pE|ihRvX?J*poFS3~21KxhF0sJ6!6<#Q7SiQt4bNV5B z)8?!G$A{rh5Q;Bm9>x(mMBXEqS2^;>3+hUdVtV$+onReHF14wc&V)%dI-vq9zR;q> zV%+a-TgW_5rr5kP48_}F#`oh+A9d1nlmDH^o-0Yblxuq++nzW@`aCUpAb>1L-lX40 zs|}On*)=AFPob}}`vcX>;1=cT>calhbuT;1DsjfiUy?AkcmlRMWu=foV5Mv2LioKK zEyt+cN+@yDMfJgxB_UU`(>FJ`PSu0OUN2PRGb9&*h8Odnjx3HQ zoBRxcNm)68o#!@ve{Kvm*#G%o=AQlNoTZ&a`#l)@st2O93yOH(U?8-gnxbNi$>oxv z7ln#?iop_xPELV*Gw3k!5F&;n1pn1-isod#4YSj)lPYw9n2cOhV$sEOy&{`*?eOFs z9w~~56V^p^+LPrA`#=;KMe%{Q^1$9=MT`_0j^^PXsBr@+SQ~EP9d!Sj5kj|o3XY10dC2?uYVnuHHlIgX9_gd!Pb{G& z#3mmX+9`?Qh+)4gIOwJtdQhf~W*-1socS=M#VqETw6DjlJj2{KZx*TVF<;)tMb35D zyq;&hTQ|Js#JFXVOtxLL>+@KnC;1?P=b!A!OJeMV7xvDXPGa^OOGi@6#~zXd$Bp)h zG2|1Dqd2u;97jz#Y7g}yapJyjFcCAfSMN`)3EfJ%2tq@7ADbZx{>Ypzh_}y39T<+$ z^79zh-B0~t`Sxl>e=YgB(ALF8===UeYj~W~U;sb1XY$#vY0I()KZWnQix*P*Ca!m< zcqjOS^Uu9T^9#AP$JC375P6;omITt(tF2${4VDw6#_*ots?Ik_kHMITD|P&(_ddxK zo0@{se7y~C@a;XZ%^nxYDV^feyV#t7-!h6-rOGn=BBt-LL_S+>?*0s-;9^>C-T2dg z96qyN;=FzgA7{vWbKss$`^8|lNUY&T$JB7~gOJd!vcV1&{W9AVyE+Tl|HIT6HB_TUfb%v zaHed9aRxzv(ZFPwGj|r*D5qy}Qu}*(ZOtkN?IlcJmKk)H>xefLjQZ97uVC+L@a-7G z{e&I{1IM^!t@{aeU-`$yytLsU;IAKb9fhgF5~Hg_dUbQ;lKAfA^UTm|HK|YG5ya?` zA4UnTztDz#aJ+m1*)KZqA#|B;a3E0ZiR~X!c`RF4b_B8vWqjLq{+?0{E%d?*tF2YS zJpF_2WF)a+IY-aM0aT6fN^N|UU>B3z-z%IyuIC?@Df`vVMx2{#v$~P1m2k-xT<)_; zJ#_w+AyHN1Nm#83j_l2hnU990_p9z2nH43JJn?7)1>w^y2+NJX*#JgKlRJ&>yeRWI z2jOw^#s>QVM-Xk(^Ts~CqTT_GMXceK4B9X({zy049AWILS^)=C3w{T(f-WPv*Wtaf zYn;swTHSL4UT!Q-gjv;dGdZHV=t~Scms!*fS0LP-z*dS8!`_SN!j} z1aDak-AvoCmv9BTGESr({QTY1gVr=(zu>d&-r?@2f^4w(ZPN5mxURCW_p#>nTZ`5| zW-HN9AJ8Q##$c@@R^qprUcs*dzC^zsH_Tt{h_$8$5D%q z86i&heoy+NfW3%pe^bCdtr9GGeqwj-LCq-R*t(Fn_R(T}xn?{bO7;+Ckm|y-lBqga z;|u|bd?IMfo{Y*Z7m<5cpR*!7*WPG%@+P`B`ic6NBI2`Nb>H{wm=ZFRbH1adMLkMf0#V;HhMO{e+=|OFnTd!p}V-|K+D9PtGQcr|F+-{UR zJuzBQTNe(MCP_A%J0<3~rnoHrTSB=)MenF?9Q2wyKI?uO8-rd*wkEn{^rGzql!wD7nSILVPYOI8f`Onu4-7Wn>)KN+Z&Z$of#&9BqZUFa8WaKVBh7 z^BpzAG)Iq$vn5O2>YtasIoqwF8EOl2*8QX4=}>zMmhu=ERd}+^dT;6G3k9^nsg{E0 z;>-d@_v*08P6l9R74GWU%ZGK+#eh?8dud`Sr9P;bwRN;d(C)2$Nz)sGY*H==D+{Qz zx7`F(|0K?QnSSy8%&^}`65BB?qoNY%Z;k)A&@XdG)5Ctfx+e5zwe`r@(@Txb!WX(` zU@M&OdS>GI(8D!_TI$;{Tj`QzEQi3adn2Y6mpP(m$|I6B?~TeIJR!-fd!hfLt7oL& z%kB)5 zp&4&ZP`w+;KOn<~vF76*8Ppnz4*%{VoNXNWGYmF*AX@F)!fRI4dKU5CB645%$5L~R zJJ3T;cIu19G{(T6vKN)m4-#(5Y@X1w-<`)G+XVB_{?>B zmL-7KgBSIPpTy8^gy_~*g_T$)7dWcrSD1GWmG?(DQo_1r=aulS`r^$x&p-ZD$H(E@ z^W`zcJsW8-aaakQG>d5N;}_degUOud$~wOkrNaP-UXm&GUjk&B-p~*H>nx52qXSS; zGK-+_92xXf@jB)^(eKAzW{@%U-CkgFn?AzF>~Fs!-Nfr%gDEKksG>P3hm!?|ji{a` z(1O-+&PYh3N{>RM<#6b0l8T5K#AaLUq$JIa#(R>SwjX_Z_EC%?xzw%~Pswi3vL?fy z2|MzGO0H4WI}2fK0TmTQqW>9-t-S-ttl^?Dmt>!oTM{+c7>)y|!9 zDwv3K3TZ%h-OF1CW4&n%%B}%?+ic$F;nG32X(hz1-=Z2YNQB*@DP`I9rR9QURh$JH{lk0Vg!h!nR*a4o|mK57a~4bHG2_ryZxH_d9r^{m$-Ky zJNs~O>Mn|YHg2ql3>mZr6WfuR1ZpN_tljwcrQwb~0wYf;ZNK=&q;uuD{K6D4bfAFl zN2#)kr*~9OZl9#QiuGJK?Ip)UKAOG{ijW^ETZ}grEY{^28!fb3F-ZcOF|^# zx-+vwC9pf0<=rUI+`46=(!}$W|4&P_fhgY|I&B$x?5_k!5EV_Mg_5%5m$a=_vVkE( z%wJaS5+aWxBu)ZUq;>H@x*P=_An%AtiecBm?{2 znoVHOm$D92uRcIhatm8_TpO{Gx<$P(?va1+%_HM{B{xIg5lO0R-=ApT>whXgUBqTf z)9aVL&Vim^VJvl#bGXd+X||fXqRllp&*yTsoSA@A`g?)(AKPK@VA9O zoQ+7Rq3Z{bv-B>DUk_G$A(>MC!k$|bgL|JOUmt@3QZ2@C`t?tjb^xQu>$)s^@b_3FTp@W#n#3M-Luv%3;}x~~tYp+&dpal0>8 zTb-M7Iqk-KQ7*ga{p94Ie#w#wkq`Ln7_BYGD_s3fxZ%Q`{{i5k;{drtM9Vv$EGzxA zm3(7NT0N>^`hd`O~seJ7*9 zZE!}!k-|U105n`8-5Ui;CSR2c*BtmoC=+p`?`r2zf(q#U%`Zr$zK}}YHVNb2)bl^V zJar*HArvCQs><3?X|OMz&fLUPXIyJj*@FO-sBp=aAp2R0JxxwC@pmkl5K4iY%i>6S z(Du(SQIQ1Hcrruj+|zN>wya z7Y9{u4X%olj{qMHDQ?!}imv_fDYtdG2Ehm^L$gV%H~0C!fAtbCr2kI~a2@<|-PvU2 zNvX~S9*rR`i|fapjOiLUvfG5eJMB(cZWPP93B?!Ba*cJ7)H0F4W6 z#h^q#O+W4H5Ax2G69fs4t?($(-e9fH?Tf;G{uP-s3X%l~#O6!g2&$&6ctsrv!jMC5 zd1p+YDjd!ysbu4ina9jLaBD3j5npX)JI3X_#bq8Q!0t1!zQSjtJ*Z4xZXBzY?a6{( zM@05oCU7wo=!dqW`Qwt}oHmLBZS_2Dq6 zk=Z;vfxcMC6eiec-6W^kV|bV-?`5=g6a0vP^FgG*d~AF^z6`-7<7~oiP!-160gqOj zsd`8vcdHi_Rc(dIh*wNZYi|>!1=%0VEUoU4AiGg{I|Sn5Ytn8;Hc$db{-Zwx%BXyG zkWAUT*N8{PA*-d8n3y%w>cf*O9Y7U$vAIq8Y`#dy0tq@_OZajYcztnFjH@u$;Di-A z^~FqPrLN=VLLE*9P!~xWhAbjK0;pqNfMYw|=HDor$y=NPm&1^wHB07O3(G=a6Hs*q zqhl6m70M%Tf#&M82fhA4iH*E2rrBpq3=DICXIyPF4Eto#+F(w1qJ$Ay3xJ(%6R<@x z1?=7(__ccPf}}>?Esp{JgIWJ-lEkz0X&%RjMf}yx^`*shwMDh>p=C%agB)2tH`lOs zu`+@0%~f+^9v{;S;9p5xs|@Qu%l7e>l5y(F#=@ralj{A_~> z)&*BZo##bEz`_nw8lBIW4EL8h$ft1Ex0Bs%^Q%9r z{9ZCEEwO;G$+3w*D|dKa3IUX&uH?t5n$Bczhanf`D!XoNHWD2J-fgps{THNWT)Wa^ z0;dU}gNb_Va7LDxSE`8tsXO5r>b!c``UG2D5poS4FeYxPt9w$G%mU3OWPJJ7l83Pr zI~go;%8HqR=dHb%CTzWtygIYyEq^KRTHY{M9rXrJ$J)ot^lXA4*gAUCEQa-u5=R+6U^T@IHXe5YMoI0)zH925|c? zrfnF9z4}ZlzKb|!!Ow{Lr)#V%)<^S@!mR*^8-qj;umPSy0CP$PCV4m4Xb_UO6m(bm ziRZ2KJ>-3i0saV_+oXjQa3$Qt5rqm-cpmisvLyZZkGCc{XI)lihNEHa2*$$Le=fO< zcB8q;zeOCU;lzw@x*ivy3KejG!q601alrftM4ab|Rlv5(6kTM|WTRnoIK3g(rFLWa z7%^RAe3+4#0&i8mA1v`5`W-BzKNNKhmhiTkW|WgBiD*b-V_Xykg71JS*Sv8a7Z(?~ z(*Kp6hKMqzi}D5i19z3&OhrN_$NG18GUtm=z z36nx{q@81;wM8Fp%unE+4Ma!6tajX88RAj!rHN+$dx?6Yu#;gIEWPS+M#hT?24wTm#`_9Ee&$LMHA*>4$S-HzMP# zOnfy4D|C`$5&8fU-FP6HUbUxU9ss3>mwx|quoAOCa@zEi{dp;AZ9Gtp9?N2=qQY}m z8oQ*ZZS=p=Sdf`pB_FnA2D9xd{vcugi2&KpW{>r7F^vUHpo*i&#L5jGnHiI+hHzz; z?zahIOVNTW+%ag{%Vnt91NH7?+pvAK`KXZfIzx_M^O&|c!*o)JbbF_5ax`4+(qO!$ zY#8USj3?h5&6&r4)Yj>hxXsZLC=oB;SMK+)^IMXGcH|J!Z_d$r-;zBrEdXkbd5B?e zlKMhsD48Uiepdk`c%-g`8{Dsxiv)&}&8sqL#ap)?PtPQSnguJZhET==JrBBFI<8p~ zY356#t>s2cqZ{Ir*beIs4qQ8rWEoWXm>51q0Y$rs1-o;NnsCtS72owti&r_Zo<u`f^Ng3iBM=*xx)jFxz>6t9t~VJ2xV(-d=o;Kh+hUH z+0Qynyfh?jOQ!TLyh3j>+#a(Kl>Be_hIneN{O^X(HI~k{43GQ~e#ccPHM%UEKeG(U zMQr<$VLD5j$y)Rvf~HPT1*s@+(bNWM?6pW|dS0p_^A(M{eFyL?hH%PAYxG@=+oeEZGr$wL2q>q~r#!S)LKqu;G#D&p&+&8cgh@ zB2iP=IfzTotc;^1x21Nw{8)DR66n#P9Fs1ZVewEUwep*NfW;h6hsv}x+ua@Q(y1n6 zHqi2ulT$V;E^ z9hVGQO#n^A$qA>>NTnNHIES9D@T*OAz{cE_tpj+HoA z{sIz%cy)$_D>y`v0LK|3SYI-Hz)V>O&0Of>j`jTmX@sr%F^yywn(T-~SQFs1Bdl!k zfY99B!WIjn=D7IPfNeSW{mBMO;Q&(3`qmDS%JkHCZy;HUDFKM<$#^>j?Dey6fJ$f# z;U)s&F+PhzNZd2Yk1Y9V^0!dC9*nwJwy-Rk=jY~A&(wDKYWnN6RCG{!$CP6RF(ucB z9ZC8tviwcDMa-032GKx7fqSTr9%pvx^MgSb79if} z$PJ>CMt_27XOPN_{t|pZ!e%P=vMvRYPFyX7BJsDgu+pD@WT!uB!gU-xLm}xB&a<>S zCHH1UxhMjJ6Qy5I97E~M3AjVXKMg!?{@E&hT0~>27JYa)By4X6`|zm=kBQ8tdqb(t z1(+Dv-ivwI7S___Q+Kc8uXY9jXVsfljmIOE!TRD48yB+}o=y!}z0og&;zIHT6c~ah zVSJpdggu0~7C_J&;p8&y;BdTwLXthe9AU~vK7IuZThQ4ajZT%Z!GKzkW+rowlYi5q zehCyeA)50d(yl%+(ZcxLgDHaHY2VbWv2kX-pqEURcro)RnE;^!P?8;R@cc>?Ft=kg zCNnU4%Ke~yE3_u69Ojah@xE?KT7}L z!w!7+0W+3=d8Q&$;p(A`0;NCUJS-}Uvs?ebuIZZavBr*~8Ptf0ppTa~Dq()wt(?Ib zS5S!O;@ynWL(vSrt8Xh#j~-c_rKlhd^7(Z^z0<~1xhQ!Fx(7YXMK)t*ujw0PwdQT7W;QDYIa`WmzPFQ6?as1W(!Ld@HnXKL|b_zbR|kUuuOS; zZewiu;`iPUm-I{SA50dUCkzE%MK&XDBfQ)OwTkB|T={b98esj{;Fas}50FN~x9dli zvpu_sC2AJ&9K^NJk^u;OCm=`You-liSP-z59pjry{<&J+y4@+jnu|FE#?wqDxhVOS z5Z?H-nE8imawT$7+*#xE>tilh-K*l?EXKe6$UgdJ@Nkt`>#O@{Xe?*nN;FVVe_>(9 zI{%i{nbNbjzBn^Y4(-x&p(Ds_z;)G6R&u-4YVInIpyF4w5e;9;+g_oAhsGDjo5f1; zLR>6GB_rZVWb&O2zy#NFFQ(gFX2rx={`^T|_!rwb_qL~sFMR~4)h+fQjXIUkQyAJY zP1msBJEH8E2!Bo4=(!f z-NWOOoS3ugHaYGT|ARD-Gts%oTxv?2`HFrIFr&hz977#~_99^A>`Rv?1B~*W;=o6PsGQe^-I*@|pczPYFWMaY;MnT}vqE3=)M(+F zK02=EC&>?u%wFFD7I~h*DE^&O^{8gbdbEqBD0~II)f>&KcB|Kqwr`$&`~2(wuw#S5 z^yccolV?br@|zE=h3~}>?tPjXejKf82NZnDh`v9M(488%q9&Sak21&OMj8Uq6X7;^ zpT^HRY6r93Mh73R+)c|<>;&})1;yc#^qkNa|KSugfDVB3M+rB9-!R+ z83m(h5d(M8c%iDLVD#2I5O&o8b(Uo6HU518nV1#l^S+8{VOmB$pr+)dOCXCSq?BbK6)9b3ZdTogA<_ps+8|b-> z3B2Z2!K=zAC9m!4EeOq_aSdPJQIJ$hF|x|*Sd;2dqu%Q+?72%s#YO15t;v>jwe9T1 za*EyCnK7<}U2Gcg%u2RitN-cT0Y{;A2h>z6V*i;=C!m#dr_mqnZ1b3w;7t4(4OHm8 zzqLCs5+eHkd$_yYS+BW+PAa5~`EzGt$(QlM zv}Z&zEYZV5FAfRrgRnQI$)xnl6?fD6edv+xXs$IND6920S3MivQDf{4RzaM`XBA!k z-4xKFlTvxxvS9t?x|#ebk1^qH!$U6dPg@19k=pQaBgkG* zcek1EF|~2ZY1W9-V;FpG>k4{aAf2WM16Z?7?WD<^cjl&E9uOrKcDV$IG3u@Gp1&EW zD$~Y?qh3SZ?ar@0$9pP-jj2AXR9?N(Fkb37Tjn=>8vlo0?J3EVihB%CFherc3r z62zi64+42sB>hI75j|Nv0Qdtp4vMtI7kVgZE8!yV5dO%6CPCXfPAdUh=`+9^HDtp7|Q2`2BmRRG8UbZEiVW#s@jMM=I{`<4a_Ne(K@|O7Ce8MZ&7z%_@uPHon zPwV$_GI&5@k5qn%(RYT(1}-7BJZ9|!(&hif*t;z&G29_yfZ0nsVefv}bz}fgFlV3T zvlH6tVU=OG&@72Ly@(xMh+1fdznJ{f6zn(wPrn7;m^t&B+LKI|$B59=cKAYw@re^W*shjJ+c4)P3`y^kU#*( zQs-Q@8s48Kg|%1!y|Dk??qE95pQ{%`Ye<)gO5M3&cMCLZN~$H@V1ek&Z1qbh<|QnJp=7Wwcx<|M}}tF5gwHm!RU zkidLU%SRHTex%;j9NtxjZFe=&3o?QBBIZNBdpT?cZK7yL@&@@4H@)37?LCLTZmQDn z;D2KY_^LnwY4qJCXT=u!p8?UkSj9n^zBJsrvYKotz+LZ){>ScjLd*ya1R&Xrz&Lp& zTZmm>2~N_PkiDnYJ|&bt26rS`OjMI17AQM8TdIm}DXORCmr1e)! zty_%dnnk~9DN7Qr*)^`=FR?$$o2&0z92_8-*-%wtq)w-;-94vb&W_RUK8!`bc{!Ee zwFm<{Bck>OIs)Yyeg~jbDYrAfvv{j=_p+v@+5KKH5Y|XY<87i_oDzLmIlA$%xA1y& zGLgSE;|{%!1@gU%9q}F|t5O5?R7DZ={6!^p;ls+P?}DSDo$Em|oeL@JaG&4q@+x2C z_vAu#*x^r$K-u_w$xhVxDb-HIVfXyQFRJiCcKJ?Q;P=;3PS~;xbxKQodn9IaOe=5= zNOc+b3i3sH7!CIkY6wEjEX;jcnXuWE$7=ZTJ(J4+&4^RbVEb$EbdLRIQPHuRjJB?N z8<{u3ux8al=o6C*1%N|d@aFxaM5X|$Z=iQU$^hDp`q=quh6|u;8%VqN*xb2;h!UP} zxt;g!AUi37oyJyAmort9FK;o<{>66&6yD{*olXez!EKw1lKqDw;d?*u^zO~>;IH`Y zn5qV`SVWpTDTKr+_DbOqU2{lj|W~fOU}udEm0Y(Jhu4{e5>P~F~S+$ne_5#ncM$gg@6~r zT!;lVJMD$`VQ7;;#;2?JeFJso%wPOV3`G36cV;V6qxJFA&~*`Kg+;>`1uAPT!UU9! zkVya6Dx=mhPlxUaqU`#Dl=KB+_uol1k{9?+>Gj_V|L`qzObeM=@-Ada+IGFRPqL55 zmxRn+CWOS{@gx5g(Sau6DAI^S&#v!HMZGD=RBXO$L zc-@IX>FjIE$$>Y2{Cd3R*1N8aTCn&5{lV%fO;d{#(be~b_crcUWiZOqL5w`YqWSso zd;{&f+9S35#q>%x$wI!9{_avH!p5*C_m0PvzmV+~&n{&B(<^4EKkG`?I}cDpQe`v> zB8(fs-!Fb8Q@~>8@cvs#M*p51v4wSJH%P(NV;FAj_SKf1KDQsdmK>`XVLs9UvZ}Tu zC90bVBUv_&6clY<@!JgK#$`cDQkfJ|wIUVs`S7yXB_+=~+K+PA+qd)m&zjfg{dSjJ zZWmk6D%#Fwm!!{juJh_oXLSm_evm3UbpN7!Gdd~!O7!IiDFRjQ&|q@V(3gcB;>3Bz z>NlO2vk~H$rboC8<-Y@73F1MVDlQUVHjcA}v*WjNDRPoxe;cASZ1lh(bRB(F%Q(FP z;hfH6BY&mnup32(k_p-=xs4APvu;$BOay--VSCXmyYQD2gWp6C89DT|s!2e|2O$byiLS>BGx=OR@iniN9EiYR z6kfz!H}Gmr51d(#z&`88;y|mLT(=ut6=-Eoxn7xG-<9Csp=T*3B!{w$1!@I4zcDI1 z5Oy>nL>!WZyFX@6PB?H_!w26P%sQZXncb$H43%KZ$)cm{*9}R0hrTFhfuUo4Rb&D| z^;@UpuC6}RJ484}EFb7USB*{=y4qn6NGn$zqqKXO^(0z%pm1ieV$+Twqbt9(u>Uin{`alsrR+G&vL3qa)tcFe91zB%_6vf(k-Vl@1CN*9-eCM=A` z`GSoRuBcAQ0mXj-@+^wTzzV}j(QlCVDtLoZk51?LG-L4RQI#X3o!MF+%YGeyq0ZCv z-oPs3*I8O_G?Iu-J;RAIk(2X(Mh_OCD0BMzUmQ9~^ztVaMwI1sbM46N);r3Z_!*sO=3FY0Cb{1Jr+LIA4=Eu!P zMStn57n-AzRcvgdWX}J72C2PxbS_0g@J4R=xUgFg;VFbPdM`cqQ+w*FooXZCrOA+@ z1cp4K=$bG(_@)_91MP%>-f}AX*UiD+hoh_KTqCeFBI-)42$i~&(9OjW_{-6?z=WIe= zPH*cV`Tu!M$i`Sq+vTR>RiJqD8JdY?01NRGHR{k{O|bPqdYU^ax1v}qS!u2k0Ya8p0$cC>i-4(rbHdgfYnhxYRQi-i?3^uB!-Q>4UhCrshXjEhki z(+OZ4Hyhgy5i-*Ggu4rm)2JsrB)_*>X|&f8g_k1bzNs<#LdL=vT3qqLo*BYWs~ncJYUUom725~Is3KdBwv6+g6VlJ z{#y6##0D(fqv!*tH0#-v4Lx{YDPmF9#QUsh%cj{--9#OSnkWMM!* zw|~Muf4ks@@4fx=+)3G0CR^Ut%emnzJk_|L<4L)lK|@r#h|}EH-YSj~S~>VHVzv`4 zgV&bdwcege87V6|to6lHZH7dY0bM#%86UqjZ}t2$LfPVb$iU3RWFKTv8CtRhbbtM5 z`8DAPfVLI@U5bB%VB&moT|piL1E&piU`C96@&QM*r54zl`}Ka^AG zfL_epXK~MRVq#)wy$G6fG)eIY0XG#I)>@()VQ&yRX1UU5e8WB}sy?hS)Q!v$?Pe&= zUcS`|@yOAF8}@te7ABba#+cp)gnhnYAP@a-(X_TSRa;2(_lV&14&-yS5~sbEhob9i z&Jw)tM!DXke)4Agi+Jn+5NNY6mt=0Q6q?T$7XL#yW1!uI&EwN#lw6pldgyU9^$n913 zxS7rKE#Nk^`Yf%W*>){XR)(?1%5MCh!}VSXKCNDt1H9}exTmK_@%mqwvLZycXrk=7 zLt`h9K(t>$Moa)2hIrW!_{rvvs0ODwWuUWe5s*Vh0k)Ijh;o07FpeRY24KsPua7qh zo1uMvZXu5^m7Oe?G67Tazc(d%LknLRC#WG zHz*(J`|caoVbxE#%Pm!bp^OWe3%b3|0EBJ|l>FA!&oD1Phi=1Q{QxJr2uRoFROypJ zT*!bkWBzEqRC&xMjMT@bfXBKfx<9Z0XdeAa7bZowhY;krA2VI}&DVRjq`&|ktp}c@05}m&DG4_#N z>|fa;;#tAP$FW6!d1{P>M!DmEVX7it+dYyEOW6B>xPm>?_Wdug+L2uY6vi*drZ*8= zoD^;@PXJRHUo^Io69gn0ThlegG~WzAuMH$I&LSNFcQJU2o`3UIAUddrs_hX^v0f(7h!B*4+$X3)iamv*{F>%#GH_U;C-`UJa zNB3T&qo2u5NrtH!txHq3(^9)hY+%&2GT(~KvOFMM?wR~_C9+~rNjERaP(8AA# zm|~q>s#J=c?5dvLWX2bs&_|3#Sjl*Q>)!nPW*0&WCH_epbmc~1_dzKZh5|&4PhHyo zvD{@NsBsoTe}82io=MZ{xH%e=`5`bSSuFKDvBh(uQ1~dC<7aZkJe;t0mG+D=guzhVzfCF2W4-9Hzqj`8 zmw^agjJsfG^j%dIjYP!Xp4&<#h(|^>5$3bL#DhOn(1B3O$!+-iJdABTlEt_%a|;D& z*ur^SfM`|D`EKW+290ZZX#H-(oOAK?v_1x(q83^nzB$j(Fi%HC#2B`l5=bafk;$Y_ z!~Z6A^6)gxy47-}9LR!uPxb*#(7&Zh_&7TSX#su1lgod&g{2Yqz*c(Q7Rbh)nzW8@ zUNt`yl3;3k?7>^smMu(R68nTkd}m58k#yKk;xU8=!`v_Q_fXm~yZjo3C`~k`iQrSSn(r0Pk)knymEI+yzc$rG{M7u^vvn7-~IS#EOtd zCJ(m1s<`yrR(xuSju8LfklZ|?qF3^3RF2rAeVCN_Qn(3G9|{b1O|GgDUc#0Uox;W<=Y;P4HZgi_`79S|$ASB*=7kqlXAS?U40dEPN6RyZ9)%NCl^rjDF99W|gJ}EZUMm$8=oMcUS^O07 z$hfH>{=r35xSRjwZlloO;x#-IKr2uY{{kS(r_{*aN}UXE1R|TZ0j~*dB3zZ0cg5a_ zb*Ku*gOSfCR4pC(N;EL#7PnkG8?MVXJo=arOEsJ>Qe@Nbp|nK|lE*-&H>Bso|C35w zWJAbv`5_?YVe}M^`_o^ND$mmXa7FPRsFqWQk%Eha_G(`1l)LzFnAUwR+G2oh{kI)S zTx`eKZ#r9vV&Xuy{P&t48%?~Ujdx1;a4qA#qaROdQAj|!BZiFhBNogJD{h5sf^aVb zs=>-)AAA=5qS~+is(<@NcquG9?iou28#>vVR9|eZ0nqBZZ)lrX3QhAo3uqUTLmIb5 zw{iScWJwo&*9a=P6W;@BxbgBY?b#I~Lm~Dd@C|2;(f6pn1)n7+Lt1}hS>fsd9Mr~m zA!fkE=0~bc#c9VTV{{_@W!F*SXtLRm%YuAaTT{Q^Wbr-A;#}q`)oh_3R_ZvV!5`xb zus#R)L`i{3R7kh5pJ6Ymh@(PdDf)0d%pcX-jaBWkB8%u^x3?zC9nwEb>@_LW zscl;0>Q)1ue;kuYvcP~&mQ=Q)I-H41?(b_q5P|Pr^SWRFu@;6nlH`2K$fZP!PCTTiQwVa|9DD*ecEMN*6^06TnX7}a z2D-in(i{P2ky3rQ=umSf#cGT^`;f$&zr#G)mnSvB^XVXk5UhAp&M=={hm`xVnLG6^eX(MAxDTmsI*jISBy5Q9xsB5n$ zFk@7;%jCcbqk``MIAIJ|@DkEI)zW&XIqZ?!7RhrYg4)Vyqd2k4=!%MZ`zn z9rx>s*MK$~iJc6Bu7Q04s^!!MW&S58n1n|h%} z58G=q)++#7@qkF5%XOJucpz-2^9j^iuF;fFl3I)A>f#y z4VJxl@x&Xuty^K=t~W9`QT&|yhKrM17y;Em5GbD9arHsS32~J_JhtB8JWy|~u zCWBS!{$lk{qW6>igyCNR%^mwhs10EYTsB0YirAgmQ281|IGWVnQ>LqB-4XSjtUATs zBwk%XZ_Sc%@pFkhLi*<1>YIE#X5?Wg<23FEY#uq!VS$Pc-!4pt&qp&xA>SX^g|9Sv zZ;aE&gO(fdw(D>H;&g29Ewnm}sY$IghLEdP-$coWQhNaMP=wvPSMIxwuf*)cMz}Md zo3BJpQu7oh5>H^m-1TSO_JQyGXc`{YgmavDGqj6`(vIZt1Vz|mIdHuZQ`iPbRT%5; zZ`Ib|zr=HMUZ*yc`G9H>viUTmmxbF?NDGP(^cpYNuL)8Q!<7O zbZTR95?2Ujg85!DSacC`62M^Mh2tg=HyQ||zUp7#q-J>-KRD%ABV#xEFy2};1#1E) z7H57>wg))wKMI6b}{v{Axkw3|o6cekC zcAo4;LZP@`_{Y-APxwzq3Ok~U)(PqKF)VBZfB3jeOBf-{+vK%~q}cGlT=S}OiMhBH z0)?J^g#HLmhZ2VlbElphhvLhZe@3#LW75m(wQirBTcTQq_QLdvI|Ih4QQs?(aH4Ad z?jb6C{^A7YdF{hm>{w^T_u-Eye0FF+ZXv_3cpse7gvKFPf>)Am}nA31$t zthj}nRxhrVfGWgFal`1u!Na0lJ9*+*!qc!l9t32#$vJDH4LttjXQxEw^Xd$89(RqU zD0#${>-u@C8ol9-^7N-bv(fRb(KNEK(yR^)|2XW{TS*?;qQHrhRD<>;09vkAV&ZiKPIgp6}F+ zA(SYpRx0_Af4rFwqn&>6mR#JIcI8IVj|r44=MR<) z>lipEptsOu51oJ?5G3y@LpWPL@2qr3)thrsi#Shk@%q}*uSB3#TvtyoH-Ew*L<&iE z98h;b7y+Nyi1YAJQt7+G=2{S=q$>ev`nlQRoL3Yd*FP{SAzUiG`rTRyWi-qaO`5{t zDj!7@m-7yH4^zK=SvtN2}t++nca(5XVBec9(}39aYl~HKK~V~Q?E|l z(l~0x@hV#5|6kA&MI}7Wyh%tC;5M(7b_0Ei;%iz@_?K2f%h0}691xCbf?>4k!xSbJ zs^s$W{Js3r6u#7M415?T6EPcCBoI7@2I^ODqYE9-z;F|Jv?aR99cKt_WZPEy_`az3ZLw-XwxuJ5@CoTQye z$Dio@yS}mn5m(Z@4|*c=B=3n+#^~v6J2;&6zheU-sU%jnR;Ag}!ETEj&CQj5br<(Y z#!+K6TR(E_1peHF=Ng}eYir&~<>wcP_m{=9tgXM7VK~ES9%ctLi+c=|PJ1{F@TfiB zgYbmO_U;$ZM_HN#p4sgVFY?k0*G1&QhKtTs4~^p{uizzyx|3kEBX}j!A#| zj<}1&xk&%Kn*&Q78ZE*_2?zNa1TQrlWqDNxnwypGiQ^5kRq(i8cNfuoz!^i|$xsZP z#P@e0ZX?kp0W`=SzDWoDbV2q#XfSq=oa;iuhUB7tbg?AHpABnl9vs<;W$id&ExLiw z8%w$U_eU#3aIf)aFLZxGoK~?2zzhS$-gJliFecKycNgva#8mnnuAjt>=}WU9*dMXC9H{K-Cm55Y8N=?(CXKkCv`DUgCKH-<}~uvR7_0?7Lvg{ zT&i%2>QH(MNAsFaX+^GID;QvhkJ67u!&S|6iU|`*K!t8J&j_hCE)BAls5h+4; zw|BQ*Z-g%x@DOv`X@BH^6k+`ZTVm*0{seLIsleymcxW|Hi441A|aTd;rXHl?rn2goFs+YDuaM2%kZ_SqAq)9E+L_8ecQjao%CSTu*9VA)ku8X)GO z+T_4a7NO0K`R=~syKzy z*c;$7h2juzp6x!HGZ|i2E$n`yQix{C+1A-y7+S$%$%+yW+7Tt-JYpk|Zh2nsmjrqK zzk%aDkF}pE%$cAEPsaxnb2Aiw3WL{GqcykuL+%po1?We2lN|Muo7saP5YHxGHE>Q?Dx=A8eJviA(9tBcx3)!2v@LJ&QA4T;`+7oDy5jp)6z5k!mL zd+%-Qorr|!L5MDT@13*A^M2oZew-iYUoKa!z2+Koj4{U?w<%_YIt-h+H4B=fp9S>JBfgZ}sdC}X_4#FOq{B=moWS3cHk>36pBI?Yj$Lmy zo2zx0nJ}mm#H>+Eu>Y4i%Yz^@0lB|$Sb));qY|vz0woA3|HpgXmVNV2p$@+%5RKt_ z3TQVV9+#Nm&P{nnRk+gdu_yX;cU_iRI@r`N{-W-O{XQw66|X}&8pTDB+Z>ZRB9TT5 zeGNj2we~;?_vbH{{>_+=i>xV-9+pV@tEZf4Zbulr)B#G8(JD2ZL44+kC%y^d;(Hws z;4dHzx<`mDK3Eoo!`AKZkXCXpK7VP$Z4x0Y(M5!MP07II9E=IFhu0K&qlKEo6-qfr zU}VPL^R*bO`ZM7?ZDnQPbu)N)YP(y*5Z=LMd3V0G5kCyeXmP&Tji^2kCFBUN?O6cPms4S;VVAA7qftj;0TvACNd$+76KB+;~dGh^gd|7w5 zIycj%0bu<1?YqK?LyP zW(L{@|21p7M1k*f)5(%7LxDi>3QcT1`lhV`g2L?}&S5OO(#T-jOTdYvd%@v2BYV|j z4=?B;j0K)yCW~m53Cm5217D>=kpdh*tn~*BT*-;!X&BhOK21HVY(?21dpRp@ zO88pETTg^j`zyoP^}#72k;<--=Y?8Zin#>m&;K^8i{gK6*h8pB(;Q;11s(;tIOB5^ zolp(7XBf1e3*Ug5zK5K-Xg!sdB**YsoBForJi}P(RQ7v;4-eW5s$}|6vAM|b>Iel4 z7AUr&uw7y0Z4d#_&OseROY%}bg~0>;G*16ln+(9nnCOVFsoQ{uHkjo&v$XcR;qYWz z`1tF@E9i+X@(doOM(9+g`VOS;4^_#D+o4uGp2B}EwQAg4GnCINQcb{sUhZ+BWw}Kd zKoHi#e-+lG5N8|>73#t8?E;os@mgkwyQAZ|9XAw3j1bF2_<4|pzDVi8c-=3s*#Q37egH6M>0IFB4bc9sx{1o zb!BfbAaB~EV|ADSsUUhAHB2U;ID$czKTZ%6#`a%oJp`78C{aSC3;YzdIHT^q`q;!X zz#cPj#@S^++XV9U#y;?ORxnT)jPu|R^k20RENgp{j6Js~Q`=@(MIV!0id$L)ssoI6 z`|n9K1@cJ$^|$;@_kPpI)85+=ahOMtOv;czg{7%RORNcaGlY`z%u&+e9IacD>Dx;< zUW&EUJsU_cXq{1Bh>0TEEtBvz=ymWGw;;v=UXLm{pcI%}XMdUclk)w+@Nr0eoG4q< zMVmfPHt?kumI6oI2-`s)OPz!vCKR8%68MEAHDly+_EGq2aSO1o|jt ztr9|xt&3zeom7ycSIp`p@=P|R-! z^?AG{V%Q){Bs6?x**(FG{qO4e+1SiAkl@pHhJhJ>Acl#BpF6)4g&{;<+S+2@lmI!b z5y^TgDIG^)R5i7mpmUXfLMPZ2r@~E6BR3IUsE}{J{m(Q_f$sm<8H%*W^BS|YG|m$# z;EMztN~U77{qI9_f^d!Rm>G05pkyf^zj^$G>OcBONLNHS8K;o?@@)|T^~35850AlV zJ%>3H_&1^xrLRys+q#}X8ZiHW{=4a;CrV)a;h|~cp(`}~5$mc>e+ELJ2;&3%&9DmeMka?PKOBrEEaeh)bD}bsEM$pSN57S!ez~54tlW%eRoiaKK|SX^YeZkUAJ? zbZ5#_(=#ZHChr{$1GpvACi?)3!6&U&?}KnkSg{b{Pw?}ftxro zdy)Okj8O%cZ10w9Y`*~7=K-j3w0!t?utcIrkC8kKbnMO|04ukm4=t$>ik>2%a8L_68rR;zt_P>N_ z3n|p_8y+ihsCz}B<4VPNhKvjUrTj-7P^VU{d#J7Q8niPl&j<3TjW>`RvI3Lk-98TT zdLR>lesU3cL_V0W0rhxWK?Tz&^n?=&8hui)$t?tQM(n25L#IaB-xnXj-o{_RBZ# zLhc6-IG_cwAR`+9FEMV48R`RDxc>H}{c8~MS5zRA%BEbdC=aK!fA{%1?e~{|(k&rY z){_bxGW3`Bf3N^Mv_sicowh}vaHX_tWCx`Fdv}9T!;vW^v3@>ZtwU}3fLw`UP{`Mg zBCWRr2?8OVsqvo301YOc*X6V8w}YHJuZ!{S_M2kIb_XrwUH5= zEqiH^*v$vGzpFQqud%3vMhyQrRy&$(dguI}g_B|b=-8U`hX>{}u3j$05Gzemzp_T9 zi=4@0!S@Wo&vCG0U)pkoU{?r7y*k%5dP-ICpAP%v(ihLk)xxXJR&@VFVY=5sh7Lvp zj~&&@WcQYT(8*voh5bXAAvKiXR`HGN%YwvJl_$Sl%Wzz6vZJW)k5V$=pD(jDB~26+ z?3Aefa1ZJ=yi%4b8!;$byZimV1dI(r1l$$b@r!Wg6Oze8cJZVyAYIc7JN4@{cB^MC z2?-k|*q$K$PYd7KwOInmkTeO-C;x4vas~oA_hbl9vORKXb1_6JR0D$HMA?c#BOdU_ z5RyZhI;-V#4`wDyEe_Ry3TFdn#7nw-Cnqt`$ACxAEMxkTdGa0$T66`*8MbIsFLay_ zg+wwLr0Ku9A${q7DUx6%PBVc0A;VvN4FTEU=h;rwDM4SU8SMh#>*5n&J@?=*UnrP) zRKng562-z?UxTT8wR47@yD)txARA$hzX zI`z%c39{IdReaQMvTG%|{dlI)8Uh%_9$YWxF&wumS@N`q%?4mp^X(97swYPvHEpaK zK4=JeE;CwReSKRG7TkmjMSu!_0M410beHNQqubqR%ev*6q!}=HLHt2|(vM|BD*(67NMtSvC z*jaP03fnQxlfj_DuMkZ0don9_rs-dzIEOAjs3Oayu-O24a0vx-!&F)Rxt>yjEFXs) zf&r56^PU`uCx_^Try5hnJ|qLu$!&dJ@`gXzNrF zOBi4Od#N5A#4#RJAj>$?f&j#t|Kvv1?(D{K~q(821+fp=XK8P z&vE{3?sE#<1Z4RCiU`uOmhYN%PLC_zGU=#}I(df}z;G*o`L@!kah!K-JE27Q=aRA7&Hol06R!66LOxXEZ*GoBtoZAqEnh`XWSK}SJgr5PH`gzedPWqLOUS2O#i z1sx#Cf4gT`Oxa;A3ZwK5EeJIF{LijNXlcdFpx*$s6IM+_=n`&Tpa+KX`EaT2Mhq7 z_R9#AQp^PU4S*z=iM!vGmvR|&KL)5-yyRhUCwKHa>`VVnp|2kCWO$WXRUfw+J#Fvz z^(%Ek6UpGBele&&#;9~3J<&B#ZxTsrU%!g_&#qC)VZ%^=pGUi5$*rb-JE7hk1Asn~ zmQ)6tJu~0k4nPKhI~3oO=eJIEP#P2mG}u@}vw=(USme_6Q{;m_F|4!#rQ1q~7VoRV z|9-WwI&2o|ukiS*sM67s?tEg57)kfhPU#}d+@omt6==R6&Pggy5KwxT8c4W7+Q2e} z|NktrEh9%gFubGtN;#0Bt@-8kYzTs`J;nxf7zO_ZoDv@{_|KCz#n>D@0m!xEHx^|)GN?JscT&ryG2(yCv0*A&&R%`LX0WI%NB!JNl{c<9QS z0=y6-0MwTmp^Hki7o^$YMI-L4*a$BOYt0H?OQ0(c8wb-{jP*2F=kvms4a`(#t8H>2 zDSkc3D@Q(7{-_&Ev5)}KlatQo2zfXj^x0@s43_B&@tcu+TcV=8fMV|o(4336oHjQM z=v;J(FvvC@0;ARJ-#x~Yis0*wp{0me7*Qq2-M|8M?D*n0)Nt=_#NnRg7I6n$*(4el z0z=NS&z6`T%dglICpsqaL4lx<#*YOUV*NavOj@twIw3d=D&?;d=)m!g=3*b?MwNh% zsuwPvf19|!bmxV+k*V8$7&($Yh?{Iz}VQ<=&3Pt4iANzXnvKx(V115%oo^PmIbDt90oVS(Qj0w%|&wZZoo zYpja)q~>+G{Y@P}yE;#w0^FE;qkTgPP?<1OIZS90n6$^uF9TKQfr5Ce4}XZ>Yeq%Q z?nP63{gd^zCnRN2UIhG~iWed3qBQ~1{vs_+@~~d5L={D)q8MUryJH)F(S1`QTQnuI zbr66dG||>jHEPW_rOeP!j;uqbiJg9i`hFhz))2_U<^xhIZG;hT<2ZW>0(7RefgCT8 zde#hY3LK9t-7vODC&IuTluNJOWnM^LE@Bf4%0_@^hA2!Wm#_ZXDM~Ritq*2BJuLZK ztf=&{@TD0}k*LsQt--q$e>&Dkg2ZuOBCR)UDIzoSi@yYODHx5-vUxlsS*VR9Qe@|? z5G`5t1C|cZ&W(rX1#a*NLq}ulC;5X zbk~r}l#1={b++wW9+X$G&u84R1!u`ZUiqb3)x6{3d{`GEPo}+CwH*fS?WYbL6U1=~ znNsNfAyOKZe#A&!{|BJs;af3QFO~{o8p? zW;|t`EmHirt4w)!MzhcrPyArfO|Sc%I*JZ3R3C zc%g?fE|TRzSQhm8l4V*N3EASo2x$rG#}XY!-yi zXs`%!*QA=3IYR}&z)7T++Hb(J&c8U+cX7!luIIiec_&;^4Y%Dv zS_GCpMnOR5J1ewC)hr&9lF=wrMZp|HiRKrnI=EM+(%gu?%kzmo`rZ5y*+#EAp7$+1 zk--k~(6ze72Qr%X$n)UZ5Bu9|gYVitDwJM(97yZe|b&5bSXSbGTv+#UfOZLxl z2;7wRn*Po^4{+er9tI36kwTjA=Z8i}w-lo3AY^iRi+sgLw-SHjZUEwrU*4N!#N#!t zF3$v(31#~Dd-(0qkjUGZTH3)sK8OY@u7L~o@6v@N&fMa2W$kdQS;`qU6bzb4JwYFh zpgUZcq$|_lkTOVwny{p$pmrdirO)%mT zz^*`8Bg>zXGp|WoKm{M2qkN3KmR2pZnsz%STZh0zMV}DE+Olt6Jkf#FQ z+D|~^bd}_PRd4NO!PPCafTNq=b2Z+zlM zF10V}ouTrZSVMnMITyWpAD$-LK5Ub{$Z!`8^XdmKNIF-LbS|nr(wO^8f&gNCuRMZT z;5~Z|ww_ne=yH|IE6l@=F#X&DPFnH*Oow+4KbZcV8`1L-g>l2EI$vh07vYdZELwxE zBz)u0C=*i&7`Mg?REbgnnB_Pi7FY}@hQ>2$vK*glm#839OuI&W#`}MxnFr#{1_9GF zmM|2JGUK3%APM@}kdxe-wVIC&Jd7f~9qF)eeq#t?Ap6ANLCI-9SvFgMA@CKy50RL2 ztSNOwHH-Uyy^=o1}UJ4m{c+Z^<@o7idPJS45~ zCC#JV<~3y10&Z>YO4?!!>W!EJKPscIN!jhrwp$V`KO|kRwMaET7`f}bD*bqNwNuhI z&_*bumAVza{kj~yaecP41!zeDpOR`^tgN7aQzLf-^s~%|h#1t0u@hJ#4QJP5Nmh|a z*aRKCEB~rvzEZ0(mH+_x*0q#ZX!uQz24P-hScL3CNi80(5H+Ydo>?h`Rl1v@>8($q((Pxc(cbR1Bzl!i;*Km43)U@VNQd>u#;y)L^1!H^i$n>SL3} zEjw8$5>`{PC9BKCqf&yyGKs3IK3v%$ClDal2+Vqs5d;(l7ziHvFbRw7k{te=*KS{&{pn?nTv+)5c8`apG`UPhr$H`>un%OS>fP}&c-EJ-iP4t>(v%}uW4XF%-EqIiM+Q0T zvZ?Mpz5)k{Py04)!x4yl2jD^iw<=W3#ZuRQY9U_r5!({>nCZ*Uywe{Rza@QkmoY# z=iZyG4`-wC9G~{O-XMyel+XUUBY=WI1PiFpu>k&$XFh;0*0j|n$Tjo)Ws864`e_Oi z-{$KTR}!c2kDA=Z4K?ir&#x{NzrRuDb%4ewnLlf0UUpgnI-T@}yzYnD=Y>e9`1IK2 zS0@`2fWl(^$YJMDOz)T8@mz`UVn7HIW;sE#!bSKgtI>A8v^`SB^a5Y&XF1KZYvlIM zuX^;1;jBUBIuemhw5~{ot7cK7R&4 zk@$kmls!II4Zz2ox)2`%#k+$QJNHp(zmWV&;G}v@X-fmc>2+yB$2h1I%=1+H1~Q9T zVK;4(nETjnep8ieP&^3Sf;}1R-r0TPr768$rnUxHG_f0YN^81ita>fthtOsK!A>_d zgo9SfyXp9dPM;zLlCkEMy6w>3|@&v-Nmhegh%@5=bbFrpQ0ly6r}Oq4&rxdvqm+CKy9AcETjW6) zm3)^{U5_YCK6-Wt;Wu)`7lnP-;Kgr?WGmjSY1b$xZm8)YS`ZypaNNVz;MH3t7t}FX zUi&j6DPDl?WQYbGD^eo!1Uv#2t2u^B%IXbBs2Cpc8FptzHlP8h3MlYd{wh{JT~Cu? zQ6)D4}PWWJXa06s9K!EUiKLCWWm(}GUG@@guw;jnjqs)G0q;2C_ilLlQU z(yP))w7L^>8Gi*4aejQIkio0&g zLFq!7cw3m2rU3s%gxnb$xP+alEkG8K;QgsVPUB16f%P6GfEwVeTXNMYK|-=B@p~(A z0DxWufCDYX;lCRFh^2l9V7NVRkNQgh$97)%idw#^MUnP%T+b)F3KogVgLsV?`(9H9%FW_qj>dfuH) zTLSpG#-C^8YL&Iog4cBIXHz<}>18^65!YCH&hvb3`|?^?!n7Op{)#Ze?f@MkK}4!M zYUWcU-+Jrm!lF!1*&Tv)Zw{dIJMX5d)wA>hrHs&C6yg5xZ#NO*PBy@0qsHZpz^a~P z_LH>6{bhn2GI*N{vS!@`SL*T%nSd_h6=D1pDRc8l$msaLzNwPYD%~9ip z$|wY!JNfO2)@;_V=T*x**(PHekl{tTP~m}M(I8Z#NHOExihmF?_c{%eib!EKx9@5s z*SlCaWB(+7a%!zM6H?^nuDM_j?7zsi+Byv%$L0E3 z>AeRyNo7C2qP9RceXiD$QvXn}0oxGiN`lX<1Zdy}kVEmH<*r-feQFOMhIzK!jcuR= z{jbshu9fB=YfNK=GNLJg!zq!67~h*cuA#m$h)u2InyehRf0GGwpcj%gbCV3^miXXA zD(ai5vyQKs3>ywTT100%_0#$j`#Ns!ro-toOk)To0)wpZr)SL!S*VvM3hFh6*cDiW zk$SKExQfZYBXfSGZl`HVS@PpZa-t{xaw>kRqQ)vd$T7z2a}8UkOa}OWjlEEmE7s22 z+~!wMcY&MTI_LXak86OuwUuVqp-c%j@Vl!&Ki~iRBQEIhxY`_uKm{iOdrhv6GX3@z znkpgpWr8+<08y%5hJOdB`GQ+Ld&C18RVp_o0rGKtYq}t_-UT+fm5JI<97jWPaD8P z=dqDNzs~zpk6Z_I2!|C5P&Y#Mn^rN0D1#;L+!lSL0Pp{We*~N=TuAD61o*gGE^G(g zr5pg%-}9)Cpq>XCpD^RLDVD$jAgjk!KNGl^bNZn{rQ~!6EL`O~uMXFK>w(~2L51?e z+i4RFe;#5}G;}|o1Dx;l-!%ZfcN6*Kodi0Ffu=g9%@ z0@4lJV?G6r8|qdM2NXS@;&zWg7k%FI9DNN-=r+5Z+_jN2zzj!xM)S3>qwp_fuKnP2 z9w)2BPTDMQ_7q4-d4IYdqh9}yVgrBzSvU2GkH1=9jNKB@H#uyv<7j1t4i+D$_zS72 zy+nUD2L1*iUqT)pcT}*xLKlbA-EcG__X7wp-zj6cU#+nY@23RFt%<_;ahW$lpqTRj z2p`nZe%Sd9@mZX|Q_np!KiylA1!KLRiXhE8Rhg%Q zM^tKlg{^M#@ER(qxJbFLALEo!T+YpUV&nZrA6G}OoW#!p_ZU3Sm`u$t{=_n$AKCWjTZn7Gy5HM2sfZdgU+= z@CgF)TXfQj=TI{3;fy3(hh2JCLE=h{kdla)y4C_d(cMY|@{Lf2O-w8;NKM68g516{ zoAo;vQJA;4qKW1|THLgAMyK#s#A&fK{)|>L$w(!wS4ehIvJ^n4@&eM00WU<8QM_b? zC}ec0VnW%4MS{h_FMY^~DI_ib{93G6hwV8wd)ru`7xKU=#t%&cpMtxCO50y3296sj zfPHibnz+CA_tUl-EJfn}T;88!;eWyGO4hdB?epO=fK&k#1kF3_S9&eHXFD_D;P=-1 z8adGxL;2mkA!CexG(tZ0uh2H!+yZq(@Qy5LQQZrRMtFoYOPO9j-J3Y_|qL_sh)9 z8dodNklLw1x_JSxx_<#?qR2)|1~d4OhMhxlGwI}&32R1KpZo#Al<9Bw6-=zWvy7;s zZc^qw_PCv){%A6weX@FTWxx_50G4sv8Vcgh>OEv~xFW5gz@ezlkKWtrD?Jth6)P*$ zlTv`ciSHaP_x`9vyM$kf#0BB~?5_p~kOv%+8;$&uBJ;_=(&#lHz=FeE-5Sd+Xywdi z+1=_PX1o+!jl`jOiOrnXa#S7o{I+n}3N8!M<^c^x_o~ZgB;7978L?OH3R=E}v&Z<9 zW3cK}Pe>qZuFk8U6vNsB6LQ5AHF2CrsCVE$y3*OUIMg44Ng|sl!GU3^?H07r`->wZi^|1%IKAx#tWX`^SW29n@ zTxj~aS)O^p*vz%E3?l<-diCiesH?kqkt&0<0u4z_V4U5qXi66WT(9j7y4vwfpfv&E zZ|0ESnqMwXr2cRb_%z(oP;>zAv`27`)}sQ7-N3}!y*la3?Yp_+I%x-hJRu=WD?TQE z0G7p3q5TRy-R3NH5ILwkFky$8Y}Se8n9t9BS@K3!9d(SI1`LOd6MeWku~@#$$FF9% z%m4u0&k;k^(NS@c&aIZ=;rvX!qeH&LFd|Cv{Nmi}QAhCYm3H4WsiV#%_b4SM4qG;d z7x*Gvc<3L z#?)nT#%?23G88%2ef}_ted#iVaBX>sG+V!?;fA9mJFj}}TRM88rQ_!LKUe@WhKX{K zQRk#j$N>WD5hM}YHevk$OGGSs$gjanExPAIDPzfF91`H+R8N6=$0((4_bBnm;a_|QF% z$#O$Z3L*Q8i0<)o!#yXA=hwV0*5E59hnDi4xr2&_%9b4;uGzc-Deo)p+s}tY;LWL;U0uBTc=l(6W+TFJC!yDH&q9X zJq>VqUAmG!?i$}-CcmC^DbhxzJ>JU7b_LN zcMYV*Yr`DVm_}S~Eo7T=b?Z6v>krS^oxhd*oWFfg8J6)i?w~He#reUOF|#5H`xTm~ zt=lWIkOIlz^Gx^EYl`>H=#2k7T`p=y#4pt-q#_A53hEWUR_>3so-QJ=p3V^jNZqoY z(H9?wv}@95%l$)OZ$yN#K}ZN1q4=j0gMp@xBZqaB(DVvn!QKh#x!ji7NMl{Fp;)Vp%2bxA(o+4chNv#2pfyX{Uk)mZuOAJD5 z5qDa{B|?61%Cg7sC(H_~#@5_QTspd;iP#mLQ?~im9~D~j{?3CHGiWSpd3~4DDaKYB zQ&QzK;155&S1ffjKhTM}U`0@Bn@n5y>4C-k*9R@s+)N7EYV9LvVmlf(e|pxHeUj_nz5ohGWlioN*RpKa3kNL}-(Tmv*C;r2SCB;AMXANmE^0MeELS^$f8 z&4L8CyvP>m`D(Wd4m^jUfcW43*8LVnZ33mrK)_qc*z-7GvyKV&v>!KA&NS}l09X-O z+E>hPSlpV21pthJ0_?{z)`F0$4h@^~u@aXvpfeHKr!N_}ay*bFe4v0sxHD&=0cZV| zgAu*6lt+A+iELcHSJk(|M|xEIcv|lC3tQ}pp-vTDNnpkwj*7ql%r;d-ezZM5H&9ps zW}E2~&+iXOHc}J~@K+yjKDRerna9d$QCf+@f*_N7(}BZetAR-~F7`Xn0yP+z+A~{3 zZ$P7_ya^M0X(TDC+=R(#Jl#Fb)PmVDQW@x!wALB7R_J1)vA#T(%l8spEYsZ~ zsZ&KHa!J;O!0Z-?yiANNT^Jgj&UhF+KtD%REkK1Q24?Zwh;5}b$tkSRwvSyX)Et#vK))lu^ z)_E*L(1Cv@#e>LSN#bLEdb`+f_zENKeZfPmMNW&Dt>AiPp@P`-_*b-FP=E3zsmGY) zlFt-5`ExM}v*xu%NHD2gOV4##I9;`unS7KUV19VMja+%II6S=UaOi=)!Ox;_-;<@| zbU%rrE*$ZKr|3B6qazLl*mw*oF{n3N=T?Qk;Ij(jnN9oI^7=idMHzXC;!>%j#=a5# zjvtaY)`9-#Qde8KHKNH@i67Q7A;vcb9+o25p(;o}1^pb%S7I~m#;#E(He;p~AiaR> z$7_z|`8+(Utp@TXd;pVizW9opY6t64=7gs?TwE&|xnnJYE~w*86D_n&;k=+)UlQ206-WNjfT}ze;>~tGRh&67|s$_8)Pe4;jjdWk~P<9$?rK&yTyy z12&Pw4dZ6q5ELvvB2Vh}(1nbsyZ|t)gT!NwZh=o0NVGMkfBgYTGO1GW&SOMl(7drF zUNpwC&U}2yNUY@GQ&qxgOC|t@>#FUuSeAYPc=64@HjBP4${sOo3BBXflYe(~sdKJ8 zM^AK`{Rte;pO9sh(k{t%D|+77ZCS94hH-3H$3G&70c7(QEnY$}0q23Z8E_L&25thI zei!-zmOBQ1Dsexk6qX$`u-fINSxpD=Ae2lX_ebpc93@oP{_#0N_rG_ccUl0EPKjIm z6$)OF+jS-R!-K$vs9^Ev=fNfCce{JF>Hj_G$^y^jWqz}F@KnSrrT*z#@x>~Cly}PY z;OMc8NyZODw(doyf?>ot9zXMg$d{fFH-5lfQUZA3-DUI|!xQ*?YwPVjiY;`w$d`!3 zsyug09UsB6wUBaG;b}vG3f`C6@eUv9a_Q$gR87l~&j*&St(vew1zJDh3qkNCZ8i4( z9hwv+-i1ikQk*E%Uk5-3{fMLhArU;f&FaDa^9uO6h`HxtCG@=yR_QOuX~T-Ibw2Q7@U~yYmyKg#28N@8o%J}VFN-=7rIYS&<@ksx zc7s;N=2Z;=6bQxDQ;BgdE@gehF%HFRIkrbiUV(N+kie?1hrLjLu6u zHOimI#%0;cOO&eTr?&H^(VNSyLmT9l z)Ehqe7FR|D&~OE2^_c@wJ8hQF{9;}r@w)7%U(OFCP!$4A%f*+GoHnx`7wRmq7F0id zd_$++$CgD^GN9AUG3>P5$_+H71oT$?u=Ng)BU$y^DOT?fDOacbmgMY3kYwtZQO78+ zeXe9@Ib3-wuC1DFnm&n1yx-v$xu~~Up;K@Dq~ju%02#8GSD+G6=bfolc>DSjz^w>k zG}={vA8n2THOQ9}yJVEX#(qqNLSY)P;k0`~`jYo5IK@X=LrAi13r?fiBE-5kbmD`z z#TKiSN9lmHLFQbr7s_vPZJ|_1F8!e3aC1r+yKL^5BFxpd8R|H%7ynOQ=YqiqpI#I> z9KXOdOVRBwg9;m+IQoWD1>yU!#B)zM>xayKvVu=>k{=ont{@~?e_XESJX*e9wO;gp zFn5)+KAUl7&@JKL65ol6y%cKt=+uZ# zl_nTT&jTPZqb*oM7Y(R_gJ3 zJ)+<%6TGs0^*mRGGEXJ4x`S>|Jh{>Y0@&BSD{8Xn=f4z_9B({W5l6|D0>94zR6;f) zW@-x|t35S-bT*0d$hL~wXlW^1vTPfV8gKbG0*Vo;*Hb}EyU>;2A3m^M9osNv@n z)_tcvT;~4nMaAF-D+J~6p3R78joyW|UWZePFL|t+_%@He@32`ymE|?(F07Lo%$Oql zhz^+b<4LNWOG(qfAd}Kr_m;hLeUQ#_IVg%$-s$5gp=(9;6>>xXrrbf-yShr~vlpXG zU^C)Q;e$EVfYgx`@jh#f$!~W0Y5VaMUK*u*&Rxu6&DE-Q6~h`?iqqS=LJlp728;Cp zz?|=kYsHTU2l;igXW5Z_2dyDAlOD?27u7AS0!=UU z<#6RRaZUl@KA;GCvM3!{ycU>gJ;Yh~I*u;9IO) zc7plKo&@(27&|ohe&yr7AyGoAHce{iBT9e#`{b}0W3gkuhT7FyA^=}}v7z^?I>V$V zEznptk*Oq9*FAg>kgVfvn;aQ4ZgKMe8SisUJJ#ZP#g4(M6KB;irA|dBUG#7MFWuf) zIR$!EE`8^`bhq6ztRkVA&y$`%n1)m)SvTf?Z8VZ? zDmJ(_(_qtV@Q#pPr7aA&37!3h#c~d%D-|{0amra(@A0(K=FZgzG5*@jd&4+cq<5pj zgfcMtW3J7$irt7cC#KdK71cSjLS35Nw`sb8MN9T^%BDS={)a8ki+jPQMzc~Sm61yi zwF(eTw0|u*#uI^a$Tjx4!Gj~h4p~_M1TxH+Y;)@Gl*Ax9E^l3bv>=|FS5=hbdoymb zR^^NtAC11|*OY_{Hu@y>FUEvn!j)4_if%gik-FV4R#GwTW@H&>FA8naGDhMxdf8qV z?6cmG3-DQm<{75kU`$?cjd?bjnpq9)`+J%SePO+8tv4ym{~cl`4m-U2VvIG>Ma8#y zm}mn`2@SW~Z7>R=BHIfL$d!->grr~osUNM}GCe3jR<(Su^Yb4BAi$}MXFFm-kZoGP zC{xhR;FemUVj4PbJ}hqbyARMAvVnqzEp`%}h|Qg``n{g>7OlV$I=rS6J2wg50exOr zj_~gU^4n0_vNvn!et;C*PI-FhXTi+s1t{K)nNE_&)fV!j9k~t_b@tcpXFz;^| zEnY*)Q#ERq02Rx*->md1VN;8IzSG}dO5=Z5ryjP)J&hMMNrL^yBDjUR6sz$*N)F~` zO{wI)J@8Qbu_%s?Uom@UP*58g8(LK-A(sH@wt82iC^j^2H_f#;Ve*lp^?>lBMcaFi zT0r8&0ke7;eati81$RjH;_p(2TFb&jDgtrQNWoqfvm6b0*d}>QKy9C7ObkfbJt**g zxXk1@%qQlT5iJlRG!SH`oNQVp+$fNSVD@a<)QvYxV(?blXIP@p{u44<3C~7qdYpb1 zZAO}Ugp+1pDRK2{jcgfTbzj*SNmzA1%3rhqJmE^=+MoIVBd18C@&n)ZHqK%sU(sOb z1T^T<;mM=w*(;p^Ts&!cnucxC1*2Ok@1X3a?U5ROY%#vq?oka=)*>O9viQu-#s{fq z?;1SyS_JG!w+FqN#?5~0HrUx;As&gqefBn!n;5x$m_`gyUR>lFHD16qM;Rm1zISk? z_$Y?;zG5Wc{r`ms3?h?_Sp0~k?i{!>f z>2ve@@ePIMG$eQq(B@8gl);c&etSAwUnm67C38IK@VR-@){1K1SjY4v8NdQ&Bx zfYt~@Q7LR(E)73<|7-cygIK3Z(07@-Vqf$%#Zije%>?YBetZt4j(|lPP#RUlNz_+& z%d#`4rvs`Y<#7Kh@g1Rs12ob1h1(oNu6mz=nE||`NcU5lHX)bq1y(&vYVJGIl z7P!Y9vas#-EO_X?QH@#6H65)}Nksv0)PRN^LU5;(wpPBY!*CxV5>unG&q3`R&v2$s zD<0zs@S#L;^ql%>rDR3FqugdRltzDKMuCIgTB`1gTi}bDBtlG=OohT@lxjg@Ig6_0 zuhs@Kd!x4R^!rOxiy#)aq_drFtR!=-(G1gGQ_2-yUdJ%nLG6;Vni7ZPa)YqM1ya%6 z-bfakZxMpaxd$$uJ{L;yX{gD{jthCg)khA598L3sYjH0WgR!v@?_}awGS2s07K-8- zT&x-=OSf&+Y3W%3ON@p1bLG0kAYxkLGYm_mNK@ZEpy;(aN>HiYOi^cu28Nco#kcA3 zz35d%yYS$+-pAgmd5P3kQP+l30U$u^1buItu$lWr>;=RI&zz(xQI>aOU1z-%k+0a# zO}kUqF7Rx{2j9Q*Do8TOja8aFXH>Ls>*G`0`&{IAPWHxkf*>@oiuodB#K!TjF}mzN zW$^dSbS9-p-G5pncwO(&jFB5y|EG$G2NG~TV3Nh z%34WmWJv(`$JPUF0y2V5D9O^)I1JeIcMpsNtZxHaLS!cAx$bN0KR=8(N{#aw*@+L~ zrY>{aNlwpGwAX+3Z*&28%x8dPk)3%h(1~#%*Bu!ZrR$$c-&vr;T6y>7#47l@!#o9x z+b@sYTKf&q%_th(O%yp)fhX(PmL3eLmQ-y4##Tq&wG;}lCD)K_LO}8^zgFRXW0Lo5 z@V0OEzQW66M6Ehbdp@wD3ifFEoain9z_d`UNR3lENhUuU|ILXt>;p9pu;xF<k}XB#+4#ZMe`&M+vQAN>bAi~D}5vch#bJ^$bof0g_Fs@ z94`71#mG8dwOSVR@C3-1GK76#_IUZH$3cHxCd@bTe58Mc$)#aP)X# ziL5y@cNpsbST0nHrN|5^PvjOtojU*eN3PAr!BN~bFt7z0R9r}vx-HJ=IS9?y7k{@* z-Gs0iGIY&`+%!M4fS(vQK!$Ic*UYsu#0Ho<)0KL1{D8!3CGc&}&2U+DLEKk;8DM2~ zKy1(Y-43!jAoHB9Nx7lMfS)wZY3eT_qJi%0CO$%4gwJ5RDS20@OqWCf5e=V|=>6oOmKaj{Gu=`m&wnncXka&?u(0 zhQddRh&p%YP&30udAdj!_Um5NfnEX}G{LnUI=RE*mzbPmyeAttdvlDfqIG3wvu^Zl zIpK0mvN7wDAh&KthZ=omZC=MG9QXkhCG|h!VIg4;v6=;S=0p589t0DO57cZivoIOe z&qa}$Tz)=sh&=SJq%*jfF{1FI+>vy{@aK>pkvc)l+SkN4!iTtW+#xTE0-2c#}f=>d-{eR1#Y#_r<{Q=&t$oK^YV0bzx5?v!Ldi zmm$Yd%MUKNg;g%kYF`ik$W4CLlF5+ZYMxlsMX-PRWK)D0H%qxko5K51`WNR`;@k@t z9~vuTNiVl0F`!6=o=Md{a*!74m6iqVu1zBHhrdfKfwzA}!8EI)m|ONnZvfp{0;^xe zM^$o<)$S|bbPr@5!Qt`i*c0wy0|)(X!lG-Rot(;|OQLf-0h?TbKjMJICln`jo-;c? z7w;d@1fdEjqd^GEpWueS%VDP7-(>#fC{f%Oa+V7c+AZiY+6|C@;`gA`R*ff;V|#+W zvc!3IReb@vzWMmFrlanyfZe<&5=p>r8H^efL<-#7 zcZuTIb#WebAu4XTN7f>~2XITFBb7#me7ZiWKTW^+QxGxZc`E}GrGD3{Eib;nhw$VO z7x)sQPxgJFvX4Mi-lUjt5j`8(za$RYHWx+<`Y__-2Ek^aQY#nqt4O?ffXJXjg&$Sg z2P?-M1jPn<;%^*n;l@A?a5b$#PW8YsXPHFV@ymp346Sc;w_}OYjGrp|kwt;J0OVeB z5Ymf)Rr$==`Fr<=_GTC+=!rOq{OK>!+ zIO80UVLm6sw}w4Y3Tb~o6>Hhht{*-qv8V|rDQYul>(j%z%QPEF47CXADPZ1#t zKZ&ZP&cYxPuZWHSh0EZ*++Ys5ff}0qaFQDE$GgCv2B`x=pU_mwe=Ir1QhH;kaL{90 zDk`8o1Nfd6aEjdx5CV$e=|%SUJqn0Uw%*a?uP9<7LKB0`vBH*7wW*(|OJDYi0kXC@ zDgy2K{>4r}%kPP@5CylW^CWsrOaiyZRA)I}5dPxt-L%5f%iK+KlxSmvP#*bs?6L{X z*io*FKbmPbCeuS4hkLKMH+IuU9KJmdxYGiTbdo$1T+B0q~&| zBbRf#e? zXUHnP6}w5gWvt72RbHd`C1Q!J-;Iq7KEob=X_|$~V>jrKB;pvq;xW#n9WM)Rl%s(? zHn4-wV5?Y1v$xgI88+djwcr1GWiq_Q)7jPhZX<*zX-PNK$<2WHOsx45mQj#`Ey*Q%vAEyx-xCt~uQGiBh$BN)*bYp|5x^z~1-*k?i7OMOkwqXe# zyT1|D2N*mXE8d?tzH8Fu7ionh@g%MA8+B#z)tU~uS&-ce9b>&b!tvSFryW?MprMJI91L^=P$6a&w{sZHdkqNtQ4voBFCyNQP zyzO$vvgnK#vge6KmIp7IWu3@zjxS{P*gJ~wyJMiJrn*r4Z_ku1m9Bb+YrXy+hTudg zXiPk`_sM)5{)p?0uA5q#vrn9&F|Q==F&gEg+1bUONiVfZNzoSOT9M}8?1V+HmY<#D zkhav*?de(ck2vN4IIezUF#UcXNc&c;?FFZ+3=zDHhO=S^hnOQ-lllyvoT%|rZWEaf zmS;2nz!PUe5*GkGG5A~uiW?V$?a5a&pJJ|KGeFS#yZY*U3&V8`JkEaYb`JMC_z$=j{vEng3qZELZS?L>OszF*o&bJ% z#wFP60HJs{Jeo^Tj#}~x$=r zY^+HdYb_~G$GNq5XwGX`q?<>1J|HShSR-|Y2Q(f&`qCD@6 zG9se2o=EGF^pM)`(fHIm@l`qPKD>8)lv+krZlAfSmecoRRR3AzyGx8`G}B^PY)3OeB-t2Z~(`sv**lJ?P3?_Xr*xys1C$C8$s3ONl(q|CI{> zS76bs+99y@zY|S@OkQt*_ElKRgsDYk?%gCTKG5cG)GW5nJ$Kb7|EkU}_i`(lJtEbT zY@o(UpW64yR_Z_95%2fgp@}}D%-gr;US!iq=gw@8(rS25f^)fVkle}yj2jVPuiYdS zMBd`Bn(i@NY3b9&lg#3D0B3uFPm+x!EMAZ1Z+|MzZl}+E{EEOo&`VSJ+e=%3b0 zbK~sR|Em6u&1^>4yH0+?o*OKQwg^&^dpJewF)stm%q2-4uKd^(0;$jbH^{?&%&b$l*gw2QPX0rMMfotJ z>im3?ohQv39?izj!~&^cGME$Kga?WLgZ>ux@4G75KyZG^g1}=Z+m)RAL)${_QTkPW zqL~`QKCkvsoR+t@0Zrq~{ri2OqL+fjenG}{YVP4))T%Q`B@Dt8Xh~gG>~|=v zuFMDvOoVw+yV})T_$-8q4N=l1F;MiG75BNMi7!H{Lc(H}!|GbYm;Mk`Xatt`! zTT|vSTWI-GoLd6e0 zkp^kr3bt$@Q=<l8nFC7g+hb=ccY);Qrv8q`PYvDZ74 z3Ws_w{A0Bi7@2P4WBW-1%OHSSE|E~}f;eta%)h-uaC6#9`L(Rvp0Xgf@D4**Qti;17f{AKBQ|DRRFN?B1CQMm6kzl|1gVrLHekjT02>2hXbBmP9=c{tQsJ~n z_HD^=%%)t$+WMg%!*7sFe{h;4=)hEayx9(zqlEw&0y97*JojMq&6s-2pJS*%Av_5f zNcx@ll7Zl~_2SEg|3)6u>`|z)QVIV9@fMZ~x$iIW2!Q(M@OOp|m?)D4orXJ_>0%aJ z4%p^niiYiMh<+0sRjm(!po_~mKNJ-bM|^AP&2VJ+wHvev{ss$iWSX+F;k3Dt44S0Z zuHTY)dyNS34={o6-M3^rrV^hh^fEvyog*x0$k<=~+L;Na?*j?S7Dv0QFt>qt06?g+ zpYb=GfLl%0xFzjiXZ$)_O(FbB(UjPd9K7fFme_MTN|#=`!Sy`^U95T2X#Bx_f0964 z=}^*d6dxXwy0$w%?!86W9v1c(t#XyU08Q9C*csOR%Jao--50!UoWxDHevy&B4cV+1 zuRfut72LD$dfvS%G@i&)pPTf{F{?WJXwOSD`)q#sFSV#UO2zAFZ*A&NU6{+b=T_&R ztYlkmkg;5T&@8a+(XjiYEZuqI&yHE+pa?SonRas0Z6jvwLj8uqMIFz|q!g1>K+yvz zQiH6U!|AMU_C_+qF{o@mt%0UJOlJF_H@#%ET-qhfUUEHDbix>rc=$!Mem-2uh*sie zmVFQTdla2|d^ZHO`1x7C`0lW=U+T|rMMcFwUw_;n{7UDw-Rq}2RG>vJt*0O}=#NX- z0W4PMSHmCJ#aw48Jm%p-XUD4^+pTtLEHaGF><0pzx8YM~ zr+a?_sq`RXLx*3@Q@!{@Zoz%UybiYweChAR259t%iF~ksT^`h$6SqwD)k>I3Kb#zv zi7mC;>m-C90T$cjL*1$Y9s6&%-x$u4tP1f15(C~z=j>Muy8|n&-d^!5j2#;*)6rQD z{){0Y65V&!o*FLrE2MF8j-HIhGCQC-hE)~YCz0PKiTcT#uWkU;fIagCo`2HIK%!9< z(P;KYuF+?&dWz^+eP~Wf7?koy`>OM$p5tCp#=L(J2_yg%-h+f+Idu-8%JVhsXJQZP z)#!ylX8+rxASip=(7nmVm%*c4`ct5;J(!-SYSo2ROL+~PhlWsb--VXzNJG_RWzikA z!wQ9YW<^f?eK8-et$DKBu|X1+I3%#x7nYwpw%JNf5OYZ`9o-ms}i9^7HLXdS!{tXem z*tq9*@6&Hz_J-aT*!1|JRIrfzvepaYDn|3M@oiMLlk>nAe zoS-SV3LqU?kGzz=L$8jm_eki}|A{Z^&ApF`u~St|w+J0(__0Gc%$M@+$6Znw+Zt$> z42eP}FI~|~>XP`C2YUo#Cuyc(>50ZL+xK zjy75sm}et<;!wFzz8xAHvOd5?^`&0kpua`eo!(+ou3_(BIaxfSBZmj4kJ-0}&0J{Q z`wOe5cpkPJ;zVb;ENHYeSajzOGJEB~SJd@yOb`E-_{dqEW%t?PigD;enX2tBB-Lu= z$mdK|@*njS8-1vzBzg#PAw*+wnwz&djzjxBCIpUre=YBP0gMelBgG}+O$7`DENSY( zFEA3CSIxY#76W42Fz9||anW1*LC|~@_6!5>@pDo?ZM#~uBCo;_A^?L4679eQ6{@Gh zE{zziXl@VKRg4}rcrWEV5k70jRjqcSy?{irV z8v>L|um2>PUxTV9yT%;mGc}R98v+t``dB3*4l3B-=sgW0A|fnT#fj_0p(>B@mnp)` zfqpCBRn&uA>USXdPJo0BgaOr7*;P*eo|9ZmzJKiVnML<3^Y!9JJsOK{byEvo(RDo! zwyI1i8qF=JyXRg!8kK+23m6@y15zhHpP$PCCwwlCR`*IFyf zd}SgQ;aFy_t}}c;G4p&Zg>Q_HDV1M9z^?e+KzDI3ajl(|iw^a!d%7rBaW=w4#K+sY zb$IeF1i1uV23Re=U|PgbcA^0T%LWrxrerxngG`R>i7;h zsPDO^YzkIYysPpwbPMq=ijSjv@-)hDV*229h{bPilkTomVj)ei<}R4jiP zEpT?U+$-|l$^X*&!oc$BGn=|Eyz%I$g*zG)W zd-}2-1H_0k16=Ku;{1RxyCRjEw1Usm19vy^DBbooszNMwyLikWFQ5?Dr4eP)L$^6b z^iH08$*fVQM^tZ%_5F_YUE3NG@VS$yVh+i8dL>Q0ZPWHBb$O*S@A_9J^)9=96j#ZK zzK9oG$Aa1gYgd2dH9|M|@lPdHtyfLHS6AjoXW!I_qVZeAhgC(WFvno0rh@qcOw<}4 z;lU5Z8{3K3vcv4@+!xZmNvNoB(1nG;k#X0eU_*#*z62yc-*Xzu2mT0FTbK_*$_kBw z*FTN86KQJ2jHgJbD(WQn+~P|C--8(jr(T52w-~O_3VibW2>{Q`d&?xq3XKSebD`JS z{ga$RIr+H#;bFOcQZ(m5nb20V)pM%qI+@_h&G>1=7+neKI?)1JD*&hDhD~@HpKuy- zJMmN;Tqw~+wh3yt#Hxs?e;vQ!Fm}YEShVQM^7)MsgXV8LGTyL;*6i<62z`F>wVEw| zU7npnj)UAT$NFON>$p}2YzLrlVX*2|Q@qd`;Hu-*aYW_{voaMZ3f=VT=lt;OFmg$^ zOE^zAY+Jq~P!-SF2OnuqXZ&d*dH`9Yr?obl>as?8#D*7N2Rt6KOONM1g(a@vgv^yR ziYSd%XnqUDAe-p@(X+eWSQ`#4bgoFc73(vhlhPlB#0j=|%9lpPyp)sm?+UM6!&e@$P!Q37Zt{9;hg8*idshAEM&kU6Q|4!vq>-VO zX?RJkQ>y5_uX=J$(KpGNQR4v`VXZu>YPmhIv&q|Ao5y&a?PnWbgpTuB5>L~+xkZ&$ zQr~qx;=URaCc$8ggFs;NmRKOj_gW7pmi7tmO&u?m1@$p}6u^ccdyJ@t#c#?EpJIPX z33j0vXUWxCEo!7$;WU?0*c!<-Gqyn)ztt7t9y~>inw*`coH*}TR71Q}`!@TPe2#^f zgSLeaL(*KbG!1=5P~wiSJPjR23raQg1R>fd9fVmUviM$#E`mXe5;^A7_) zaN=47>56caXaVPc_~xJzr?uQwKY3t3n|2BH(|#w{4{uwDg0E@B4@xp-_1iunoW^#f zPd7Vwy5XIJ;Xeu`Z<*q1%mc`u-h(vL?7|w`CJeh3rofF$J%P`p~Od~7xV~&RA|x7x2sS*&~ah_i$Ji6%-?!pi3e)Xv$TB;fL7|*TwTXgEd5(Hr6jExh;vdh1KJ6>b$T6AQSJGW9c;A2((3nTof zc3aj8{rCB|lF%>-I3wyVNkvK6BU*4{;d`IdLi_H=Et! z++!rdSqi)Y&DVQ4Qs;8v36vLMe)~(*l1RV<f#ll$JnQA zUFipoP@}Xi1T$@bUeo)Vcg_SJg>Lk8Ro~~Ct^mFr!2n8Vz79O_M!9uUn1RP0mDz+9 z>S#J7xsXqHHQYg1r`}h$zfw>L+8#(|!SFR2v_CvKeRRCjsc(IVx&yTDC4EKv>C?4d z_w0_xQFTwP61^fY!+qR(ik224Z&&-Pe%-@BC@vGRZV04*R9=FI`D7}#E(?Q0M z*kBE^O3Or{wx(B2O+3cptLrQUz7Dg8S4`Nf&$e?`udK;_xIJk0mauwu`qoibLd5D} zK`>jaVf*AGlh%)O4s%0G!k}pi78KjT=SypRb{{bZ4do>neg-xhYr6}p$HngjBL;0% z5_bSUtIRxk;u`RLe;EQEzp5r%n8O>s>|&_p2si{4KA_S9UhrRQ@v}&;B8<4Kpw8A$ zmega*r$To@LxDX@Ig>43j4!l-z(zMa-4`#E6m#%G95N6GWcq2%{^*;-t~F0nx9y;} zKN*OQ>~sJgH}i!cBARQXd-|)n5X|}cskJU-KX=ToURU+RMF^a;MHZ*u66*;YH@^>5 zX+}YpGe>V%yDvgG&ZjlqoqjJ+!UmrKW!WyUoOyI^HMHiXP8cq_A@5mKLT_0+P#;VM zvS5F50ai=2!b@XKLzhFqj4}RtcLnUJo9Et+$Xtx7-ZPjtY>^ZO-mC$u4RvJr$N`38 zJ!0%S)coqf!Jny#)j4mHR!Sf(K9A=ps@<}A8~OF^e&xN=Wt7nnJeHE*@OyxRsQdQ* zR!1TaRU>r@AIu@Z(x*1Z%E$AX;iaQkpra;I`4uQS8|nJuEJ>$0)`jIgCeOenS!eX^ z2}5MJ#_bl@&4jXi7KBHL?ha4kqA?~Q6tz00Tjez3)GfLvEnD(^v(1IhsZue|A_Pvo z)zKl+sn2E_+6sVStWJD`ef0e>Z#iUCmSdc+S6Vmiad?CY1 z6}!=U{LgV;ToQH#iyn+u&>dFlYzsivXY=E)yS-i??{DPioAyxt<=^CkgK`un5yDG3 z!M`vjv2j}?9wV|~_aq+{gIPh8?5g4g%kPnN#+cNRxCmwRtm>HiGu zVarS5UVNg)Mup>f_=fYg+&yOe3w=+(!*06o+vA8nI{+MoQ}!ZWU#(PC`%_mA(sWDF zO|popkrc?@NQJ|!-|pEMs9><;h~^m2pJ=tsDRx_*{#I12kwLjg0(wyYi-X5}ShLjf zAPmOPWt~`iw97{|AyB>3AXfXD5fx^c-62du(f$Ms{PRJx$~R5y3l+7-eRjpK>bwj5 zYk+;Zd!($FaxbuU)jD^XwnAZU+f{VHGos8***LFc}eL923p3Y9mD@>QnqHhG()#&p>( zqAltcwli@2Vp}sbtm!OVA;&t1=cJ*?*2BiI0)c+EqS`qIR9W=VT*5n;&u+K)ic`nq z78I3f;b!gW;j&rVcD{CZs_tp{-;~Z)L=Rh1isPJ`8ow%z=h2(2+ft^ZG7aZyYxgSi zCyrkgP%U|rHo(-$M{D1J^%QNMzqd(ap|A%kVsO#{osE-@&5<+;z_{eBaq1b}NqN#) zQv!D&dSG7EI(zMtLC}<=m(|*-yR*(5^E+rh+@wMp(PnP&H40fN@ zl6od>JUXWy!_`FDoJD`1{U7v`ZP2^y)=!}MNSblc?E;KKv*LnUk&(jHuc_U&EjlFz zEh3fdyKgCv7PCZ?h1HkuUM$KemuB~J)t7fNf+ge_kpG)i!9njXOJg#;fceJR%dyG{ z@7@!3pM57!Bnw}amm!TNraJ2yKPuWwaMvo$+_`PsoowLly|{p>kc$hkUhW&Qwzcdi z=y^%E+^eKVe2<$dNn+HirsvUe#>RyLrWj;UlfYSk+(f>iJImm`2ro>L{?reG!XQxK z0mM`k`me4kyb~`zvn>`OKXsdK@T1%PTu>p)gkmG_gMFFmsz`U9sW}p%8!eAmG%kmV zmw@y4^eJ8=V`t+IpsZ`Csq;*9vuYwJwQQ2-c{8w%VT@PiSFtLQXRkwBQI(>#rLqF! zmXcjXOuuX8NOR&3R7K2_5yaX`UCt`)<*XL3o@p(U!ZvKz=QG>2#WkzOj~9{ZX0uhq z`C$?JQ7-yne~(tj+@}p|JN35@{BuDcb4IOghJRSwEF`(5sgDLd8MWAR!*R_Mz()U_ z-D%DbS>2$JuRD0x`iWo400@9`(g2w9UlU*2%i=K2$^tDc1?e|I5LQtkzN?Z9CWBI? ztE5#N>BRYUxNlo{qdXt$LL=(0DI(+`zeBD-*`GG&>8g{$Y^{+!mguW)<&8qh=uRb? zaoWc=FFIeJV7n;BQ{b!^f6ah73xf(hT z%1`n4cQ+1M)Xq4zFZ$>69Yunz0{XWfe9yP%JJG0A!+D5L*JDZI4n*P#HVG(>d>$pE zAG=(xO=V~1$SGEoQT$v3A+`v_GrRxP?}z4@#fR@8y&py5dj*H#Bj@FA=%g)WLDI_X z7f0`%t=essKk$TB+|=dDUs!R6Kt{UmJO;CuFC_W&)rTYV7+){@=Tn@h$umh*kE23- zuh~nE^X4n>O&@+L#7ImXSK>ZxV*E(v2Z_uSG0{t9K)rOt3GfmDbw==7u02~7Mz z(^vc;mlC^$ciwe|trOtW96`U#UOwimqfo(f{?y^!OV=<}&4L1_kMZNdX<(0hOHT~8 z-!6&0mqbODl`>uHRjl(appTk9%?nK8`i+vddY(bygI)VE*Zl;s3yC2^P7Zss!r*;a zkh{zmFH9{c%LtMvhM3g(fURooBZ^-eM~9xIaIs`i*z){+3}=^doGs;#loDTDaJZ-H zNnmImv3pVs;l9gPzw_yipH$qqX!z%46pbaHBbV{TL+r#geF)M>=q|F85^D%Az^cj` z9X@(KL#_YreB{vib?k8kXso_h{z($TJbmbEuSn%8UMK|~^p)Vg6wXge1xv`ycM^Y& zsG#|pH=bFQ$~5d?%_MPZBh&fMr*KpGGBkRIV}XTZ`Se1J>LRnM+&I531=%zav3xn# z?=RyYC8JTdv74AK1j!aB)i-KQdLhSljpZDobBX`Ei}HUdN`jjk->+P`TJ%g>Qtkhx zSPOv=%+k>p;{j87E>j?bC*V}}WIB@K;>-CIU;Hj{4;#X9jQzj-*aj95>(xxbfBpBL v#~~b^m+$(ozyJ4>|1G6|Q?&p8G~|rfW=y}an$l?D3iv#eQIal%8uWriteCallback(row_mu, req_size, req_time); } -void Adapter::Write(const std::string& row, +void Adapter::Write(int opt, const std::string& row, std::map >& column, uint64_t timestamp, std::string& value) { @@ -74,7 +74,13 @@ void Adapter::Write(const std::string& row, if (FLAGS_verify) { add_checksum(row, family, qualifier, &value); } - row_mu->Put(family, qualifier, value, (int64_t)timestamp); + if (opt == PUT) { + row_mu->Put(family, qualifier, value, (int64_t)timestamp); + } else if (opt == PIF) { + row_mu->PutIfAbsent(family, qualifier, value); + } else { + abort(); + } if (FLAGS_verify) { remove_checksum(&value); } @@ -122,6 +128,8 @@ void Adapter::WriteCallback(tera::RowMutation* row_mu, size_t req_size, tera::ErrorCode err = row_mu->GetError(); if (err.GetType() == tera::ErrorCode::kOK) { write_marker_.OnSuccess(req_size, latency); + } else if (err.GetType() == tera::ErrorCode::kTxnFail) { + write_marker_.OnConflict(req_size, latency); } else { /*std::cerr << "fail to write: row=[" << row << "], column=[" << family << ":" << qualifier << "], timestamp=[" diff --git a/src/benchmark/mark.h b/src/benchmark/mark.h index c510de42c..ec5099eb5 100644 --- a/src/benchmark/mark.h +++ b/src/benchmark/mark.h @@ -18,7 +18,7 @@ #include "common/mutex.h" #include "tera.h" -#include "utils/counter.h" +#include "common/counter.h" DECLARE_int64(pend_size); DECLARE_int64(pend_count); @@ -46,7 +46,8 @@ enum OP { PUT = 1, GET = 2, SCN = 3, - DEL = 4 + DEL = 4, + PIF = 5 }; int64_t Now(); @@ -201,8 +202,11 @@ class Statistic { last_finish_size_(0), last_success_count_(0), last_success_size_(0), + last_conflict_count_(0), + last_conflict_size_(0), finish_marker_(1000000), - success_marker_(1000000) {} + success_marker_(1000000), + conflict_marker_(1000000) {} int GetOpt() { return opt_; @@ -210,24 +214,30 @@ class Statistic { void GetStatistic(int64_t* total_count, int64_t* total_size, int64_t* finish_count, int64_t* finish_size, - int64_t* success_count, int64_t* success_size) { + int64_t* success_count, int64_t* success_size, + int64_t* conflict_count, int64_t* conflict_size) { *total_count = last_total_count_ = total_count_.Get(); *total_size = last_total_size_ = total_size_.Get(); *finish_count = last_finish_count_ = finish_count_.Get(); *finish_size = last_finish_size_ = finish_size_.Get(); *success_count = last_success_count_ = success_count_.Get(); *success_size = last_success_size_ = success_size_.Get(); + *conflict_count = last_conflict_count_ = conflict_count_.Get(); + *conflict_size = last_conflict_size_ = conflict_size_.Get(); } void GetLastStatistic(int64_t* total_count, int64_t* total_size, int64_t* finish_count, int64_t* finish_size, - int64_t* success_count, int64_t* success_size) { + int64_t* success_count, int64_t* success_size, + int64_t* conflict_count, int64_t* conflict_size) { *total_count = last_total_count_; *total_size = last_total_size_; *finish_count = last_finish_count_; *finish_size = last_finish_size_; *success_count = last_success_count_; *success_size = last_success_size_; + *conflict_count = last_conflict_count_; + *conflict_size = last_conflict_size_; } Marker* GetFinishMarker() { @@ -238,6 +248,10 @@ class Statistic { return &success_marker_; } + Marker* GetConflictMarker() { + return &conflict_marker_; + } + void OnReceive(size_t size) { last_send_time_ = Now(); last_send_size_ = size; @@ -257,6 +271,12 @@ class Statistic { success_marker_.AddLatency(latency); } + void OnConflict(size_t size, uint32_t latency) { + conflict_count_.Inc(); + conflict_size_.Add(size); + conflict_marker_.AddLatency(latency); + } + void CheckPending() { int64_t max_pend_count = FLAGS_pend_count; int64_t max_pend_size = FLAGS_pend_size << 20; @@ -297,6 +317,8 @@ class Statistic { tera::Counter finish_size_; tera::Counter success_count_; tera::Counter success_size_; + tera::Counter conflict_count_; + tera::Counter conflict_size_; size_t last_send_size_; int64_t last_send_time_; @@ -307,9 +329,12 @@ class Statistic { int64_t last_finish_size_; int64_t last_success_count_; int64_t last_success_size_; + int64_t last_conflict_count_; + int64_t last_conflict_size_; Marker finish_marker_; Marker success_marker_; + Marker conflict_marker_; }; class Adapter { @@ -317,7 +342,7 @@ class Adapter { Adapter(tera::Table* table); ~Adapter(); - void Write(const std::string& row, + void Write(int opt, const std::string& row, std::map >& column, uint64_t timestamp, std::string& value); diff --git a/src/benchmark/mark_main.cc b/src/benchmark/mark_main.cc index 36ae66c4b..dd57af93a 100644 --- a/src/benchmark/mark_main.cc +++ b/src/benchmark/mark_main.cc @@ -6,6 +6,7 @@ #include #include +#include #include #include #include @@ -61,6 +62,8 @@ bool parse_row(const char* buffer, ssize_t size, *op = GET; } else if (strncmp(buffer, "PUT", 3) == 0) { *op = PUT; + } else if (strncmp(buffer, "PIF", 3) == 0) { + *op = PIF; } else { return false; } @@ -76,13 +79,14 @@ bool parse_row(const char* buffer, ssize_t size, delim = end; } row->assign(buffer, delim - buffer); - if ((delim == end && mode != WRITE && (mode != MIX || *op != PUT)) || - (delim == end && mode == DELETE)) { + if ((delim == end && mode != WRITE && + (mode != MIX || (*op != PUT && *op != PIF))) + ||(delim == end && mode == DELETE)) { return true; } // parse value - if (mode == WRITE || (mode == MIX && *op == PUT)) { + if (mode == WRITE || (mode == MIX && (*op == PUT || *op == PIF))) { if (delim == end) { return false; } @@ -170,7 +174,7 @@ bool parse_row(const char* buffer, ssize_t size, } if (comma == end) { return true; - } else if (mode == WRITE || (mode == MIX && *op == PUT)) { + } else if (mode == WRITE || (mode == MIX && (*op == PUT || *op == PIF))) { return false; } @@ -217,10 +221,11 @@ bool get_next_row(int* op, std::string* row, void print_header() { std::cout << "HH:MM:SS OPT\t"; if (mode != SCAN && type == ASYNC) { - std::cout << "SENT [speed/total]\t\t"; + std::cout << "SENT [total/speed]\t\t"; } - std::cout << "FINISH [speed/total]\t\t"; - std::cout << "SUCCESS [speed/total]\t\t"; + std::cout << "FINISH [total/speed]\t\t"; + std::cout << "SUCCESS [total/speed]\t\t"; + std::cout << "CONFLICT [total/speed]\t\t"; if (mode != SCAN && type == ASYNC) { std::cout << "PENDING [count]"; } @@ -271,24 +276,28 @@ void print_size_and_count(int64_t size, int64_t count) { } void print_statistic(Statistic* statistic) { - int64_t old_total_count, old_finish_count, old_success_count; - int64_t old_total_size, old_finish_size, old_success_size; + int64_t old_total_count, old_finish_count, old_success_count, old_conflict_count; + int64_t old_total_size, old_finish_size, old_success_size, old_conflict_size; statistic->GetLastStatistic(&old_total_count, &old_total_size, &old_finish_count, &old_finish_size, - &old_success_count, &old_success_size); + &old_success_count, &old_success_size, + &old_conflict_count, &old_conflict_size); - int64_t new_total_count, new_finish_count, new_success_count; - int64_t new_total_size, new_finish_size, new_success_size; + int64_t new_total_count, new_finish_count, new_success_count, new_conflict_count; + int64_t new_total_size, new_finish_size, new_success_size, new_conflict_size; statistic->GetStatistic(&new_total_count, &new_total_size, &new_finish_count, &new_finish_size, - &new_success_count, &new_success_size); + &new_success_count, &new_success_size, + &new_conflict_count, &new_conflict_size); int64_t total_count = new_total_count - old_total_count; int64_t finish_count = new_finish_count - old_finish_count; int64_t success_count = new_success_count - old_success_count; + int64_t conflict_count = new_conflict_count - old_conflict_count; int64_t total_size = new_total_size - old_total_size; int64_t finish_size = new_finish_size - old_finish_size; int64_t success_size = new_success_size - old_success_size; + int64_t conflict_size = new_conflict_size - old_conflict_size; int64_t total_pending_count = new_total_count - new_finish_count; // scan @@ -317,6 +326,11 @@ void print_statistic(Statistic* statistic) { std::cout << "/"; print_size_and_count(success_size, success_count); std::cout << "\t\t"; + + print_size_and_count(new_conflict_size, new_conflict_count); + std::cout << "/"; + print_size_and_count(conflict_size, conflict_count); + std::cout << "\t\t"; if (mode != SCAN && type == ASYNC) { std::cout << total_pending_count; @@ -341,6 +355,11 @@ void print_marker(Statistic* statistic) { std::cout << " [SUCCESS]" << std::endl; Marker* success_marker = statistic->GetSuccessMarker(); print_marker(success_marker); + if (statistic->GetOpt() == PUT) { + std::cout << " [CONFLICT]" << std::endl; + Marker* conflict_marker = statistic->GetConflictMarker(); + print_marker(conflict_marker); + } } void* print_proc(void* param) { @@ -416,11 +435,12 @@ void* print_proc(void* param) { } void print_summary(Statistic* marker, double duration) { - int64_t total_count, finish_count, success_count; - int64_t total_size, finish_size, success_size; + int64_t total_count, finish_count, success_count, conflict_count; + int64_t total_size, finish_size, success_size, conflict_size; marker->GetStatistic(&total_count, &total_size, &finish_count, &finish_size, - &success_count, &success_size); + &success_count, &success_size, + &conflict_count, &conflict_size); print_opt(marker); std::streamsize precision = std::cout.precision(); @@ -432,7 +452,10 @@ void print_summary(Statistic* marker, double duration) { << (double)finish_size / 1048576 / duration << " MB/s\n" << " succ: " << success_size << " bytes " << success_count << " records " - << (double)success_size / 1048576 / duration << " MB/s" + << (double)success_size / 1048576 / duration << " MB/s\n" + << " conflict: " << conflict_size << " bytes " + << conflict_count << " records " + << (double)conflict_size / 1048576 / duration << " MB/s" << std::endl; std::cout.precision(precision); std::cout.flags(flag); @@ -616,10 +639,11 @@ int main(int argc, char** argv) { switch (opt) { case PUT: + case PIF: if (type == SYNC && mode == MIX && last_opt == GET) { adapter->CommitSyncRead(); } - adapter->Write(row, column, largest_ts, value); + adapter->Write(opt, row, column, largest_ts, value); break; case GET: if (type == SYNC && mode == MIX && last_opt == PUT) { diff --git a/src/benchmark/tpcc/data_generator.cc b/src/benchmark/tpcc/data_generator.cc new file mode 100644 index 000000000..8fd76cbe6 --- /dev/null +++ b/src/benchmark/tpcc/data_generator.cc @@ -0,0 +1,182 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/data_generator.h" +#include "benchmark/tpcc/tpccdb.h" +#include "common/thread_pool.h" +#include "common/timer.h" + +DECLARE_int32(warehouses_count); +DECLARE_int32(tpcc_thread_pool_size); +DECLARE_int32(generate_data_wait_times); + +namespace tera { +namespace tpcc { + +DataGenerator::DataGenerator(RandomGenerator* rand_gen, TpccDb* db) + : event_(), + rand_gen_(rand_gen), + db_(db), + now_datatime_(get_curtime_str()), + thread_pool_(FLAGS_tpcc_thread_pool_size) { + for (int i = 0; i < kTpccTableCnt; ++i) { + states_.push_back(std::make_pair(Counter(), Counter())); + } +} + +void DataGenerator::PrintJoinTimeoutInfo(int need_cnt, int table_enum_num) { + if (need_cnt > states_[table_enum_num].first.Get() + states_[table_enum_num].second.Get()) { + LOG(ERROR) << "table:" << kTpccTables[table_enum_num] + << "[need/succ/fail]:[" + << need_cnt << "/" + << states_[table_enum_num].first.Get() << "/" + << states_[table_enum_num].first.Get() << "]"; + } +} + +void DataGenerator::Join() { + event_.Trigger(); + if (!event_.TimeWait(FLAGS_generate_data_wait_times)) { + int stock_cnt = FLAGS_warehouses_count * kItemCount; + int districts_cnt = FLAGS_warehouses_count * kDistrictCountPerWarehouse; + int customers_cnt = districts_cnt * kCustomerCountPerDistrict; + PrintJoinTimeoutInfo(kItemCount, kItemTable); + PrintJoinTimeoutInfo(stock_cnt, kStockTable); + PrintJoinTimeoutInfo(FLAGS_warehouses_count, kWarehouseTable); + PrintJoinTimeoutInfo(districts_cnt, kDistrictTable); + PrintJoinTimeoutInfo(customers_cnt, kCustomerTable); + PrintJoinTimeoutInfo(customers_cnt, kCustomerLastIndex); + PrintJoinTimeoutInfo(customers_cnt, kHistoryTable); + } +} + +void DataGenerator::GenStocks(int32_t warehouse_id) { + IdSet original_ids = PickUniqueIdSet(rand_gen_, kItemCount / 10, 1, kItemCount); + event_.AddEventSources(kItemCount); + for (int id = 1; id <= kItemCount; ++id) { + bool is_original = original_ids.find(id) != original_ids.end(); + PushToInsertQueue(std::bind(&DataGenerator::GenStock, this, id, warehouse_id, is_original)); + } +} + +void DataGenerator::GenStock(int32_t id, int32_t warehouse_id, bool is_original) { + Stock s(id, warehouse_id, is_original, rand_gen_); + VLOG(12) << s.ToString(); + db_->InsertStock(s) ? states_[kStockTable].first.Inc() : states_[kStockTable].second.Inc(); + event_.Complete(); +} + +void DataGenerator::GenCustomers(int32_t district_id, int32_t warehouse_id) { + IdSet bad_credit_ids = PickUniqueIdSet(rand_gen_, + kCustomerCountPerDistrict / 10, 1, kCustomerCountPerDistrict); + event_.AddEventSources(kCustomerCountPerDistrict); + for (int c_id = 1; c_id <= kCustomerCountPerDistrict; ++c_id) { + bool is_bad_credit = bad_credit_ids.find(c_id) != bad_credit_ids.end(); + Customer c(c_id, district_id, warehouse_id, now_datatime_, is_bad_credit, rand_gen_); + VLOG(12) << c.ToString(); + db_->InsertCustomer(c) ? states_[kCustomerTable].first.Inc() : states_[kCustomerTable].second.Inc(); + } + event_.Complete(kCustomerCountPerDistrict); +} + +void DataGenerator::GenHistorys(int32_t district_id, int32_t warehouse_id) { + event_.AddEventSources(kCustomerCountPerDistrict); + for (int h_id = 1; h_id <= kCustomerCountPerDistrict; ++h_id) { + History h(h_id, district_id, warehouse_id, now_datatime_, rand_gen_); + VLOG(12) << h.ToString(); + db_->InsertHistory(h) ? states_[kHistoryTable].first.Inc() : states_[kHistoryTable].second.Inc(); + } + event_.Complete(kCustomerCountPerDistrict); +} + +void DataGenerator::GenOrderLines(int cnt, int32_t order_id, int32_t district_id, + int32_t warehouse_id, bool new_order) { + event_.AddEventSources(cnt); + for (int i = 1; i <= cnt; ++i) { + OrderLine ol(order_id, district_id, warehouse_id, i, new_order, now_datatime_, rand_gen_); + VLOG(12) << ol.ToString(); + db_->InsertOrderLine(ol) ? states_[kOrderLineTable].first.Inc() : states_[kOrderLineTable].second.Inc(); + } + event_.Complete(cnt); +} + +void DataGenerator::GenOrders(int32_t d_id, int32_t w_id) { + std::vector disorder_ids = rand_gen_->MakeDisOrderList(1, kCustomerCountPerDistrict); + event_.AddEventSources(kCustomerCountPerDistrict); + for (int o_id = 1; o_id <= kCustomerCountPerDistrict; ++o_id) { + bool new_order = (kCustomerCountPerDistrict - kInitNewOrderCountPerDistrict) < o_id; + int32_t c_id = disorder_ids[o_id]; + Order o(o_id, c_id, d_id, w_id, new_order, now_datatime_, rand_gen_); + // insert order line and new order first + // this use sync interface + GenOrderLines(o.o_ol_cnt, o_id, d_id, w_id, new_order); + if (new_order) { + event_.AddEventSources(1); + NewOrder no(o_id, d_id, w_id); + VLOG(12) << no.ToString(); + db_->InsertNewOrder(no) ? states_[kNewOrderTable].first.Inc() : states_[kNewOrderTable].second.Inc(); + event_.Complete(1); + } + // wait orderline and neworder insert done + VLOG(12) << o.ToString(); + db_->InsertOrder(o) ? states_[kOrderTable].first.Inc() : states_[kOrderTable].second.Inc(); + } + event_.Complete(kCustomerCountPerDistrict); +} + +void DataGenerator::GenDistricts(int32_t warehouse_id) { + event_.AddEventSources(kDistrictCountPerWarehouse); + for (int d_id = 1; d_id <= kDistrictCountPerWarehouse; ++d_id) { + District d(d_id, warehouse_id, rand_gen_); + VLOG(12) << d.ToString(); + db_->InsertDistrict(d) ? states_[kDistrictTable].first.Inc() : states_[kDistrictTable].second.Inc(); + GenCustomers(d_id, warehouse_id); + GenHistorys(d_id, warehouse_id); + + GenOrders(d_id, warehouse_id); + } + event_.Complete(kDistrictCountPerWarehouse); +} + +void DataGenerator::GenWarehouses() { + event_.AddEventSources(FLAGS_warehouses_count); + for (int32_t w_id = 1; w_id <= FLAGS_warehouses_count; ++w_id) { + GenStocks(w_id); + Warehouse w(w_id, rand_gen_); + VLOG(12) << w.ToString(); + db_->InsertWarehouse(w) ? states_[kWarehouseTable].first.Inc() : states_[kWarehouseTable].second.Inc(); + + GenDistricts(w_id); + } + event_.Complete(FLAGS_warehouses_count); +} + +void DataGenerator::GenItems() { + IdSet original_ids = PickUniqueIdSet(rand_gen_, kItemCount / 10, 1, kItemCount); + event_.AddEventSources(kItemCount); + for (int i_id = 1; i_id <= kItemCount; ++i_id) { + bool is_original = original_ids.find(i_id) != original_ids.end(); + PushToInsertQueue(std::bind(&DataGenerator::GenItem, this, i_id, is_original)); + } +} + +void DataGenerator::GenItem(int32_t item_id, bool is_original) { + Item item(item_id, is_original, rand_gen_); + VLOG(12) << item.ToString(); + db_->InsertItem(item) ? states_[kItemTable].first.Inc() : states_[kItemTable].second.Inc(); + event_.Complete(); +} + +void DataGenerator::PushToInsertQueue(const ThreadPool::Task& task) { + while(thread_pool_.PendingNum() > FLAGS_tpcc_thread_pool_size / 2) { + usleep(100); + } + thread_pool_.AddTask(task); + VLOG(12) << "thread_pool pending num = " << thread_pool_.PendingNum(); +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/data_generator.h b/src/benchmark/tpcc/data_generator.h new file mode 100644 index 000000000..f5593b64c --- /dev/null +++ b/src/benchmark/tpcc/data_generator.h @@ -0,0 +1,61 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_BENCHMARK_TPCC_DATA_GENERATOR_H +#define TERA_BENCHMARK_TPCC_DATA_GENERATOR_H + +#include +#include + +#include "benchmark/tpcc/random_generator.h" +#include "benchmark/tpcc/tpccdb.h" +#include "common/counter.h" +#include "common/event.h" +#include "common/thread_pool.h" + +namespace tera { +namespace tpcc { + + +class DataGenerator { +public: + DataGenerator(RandomGenerator* random_gen, TpccDb* db); + ~DataGenerator(){} + void GenWarehouses(); + void GenItems(); + void Join(); + +private: + void PrintJoinTimeoutInfo(int need_cnt, int table_enum_num); + + // for generate data + void GenStocks(int32_t warehouse_id); + void GenCustomers(int32_t district_id, int32_t warehouse_id); + void GenHistorys(int32_t district_id, int32_t warehouse_id); + void GenOrderLines(int cnt, int32_t order_id, int32_t district_id, + int32_t warehouse_id, bool new_order); + void GenOrders(int32_t district_id, int32_t warehouse_id); + void GenDistricts(int32_t warehouse_id); + + void GenItem(int32_t item_id, bool is_original); + void GenStock(int32_t id, int32_t warehouse_id, bool is_original); + + // for async insert + void PushToInsertQueue(const ThreadPool::Task& task); +private: + typedef std::vector> InsertStates; + CompletedEvent event_; + RandomGenerator* rand_gen_; + TpccDb* db_; + InsertStates states_; + std::string now_datatime_; + common::ThreadPool thread_pool_; +}; + +} // namespace tpcc +} // namespace tera + +#endif /* TERA_BENCHMARK_TPCC_DATA_GENERATOR_H */ diff --git a/src/benchmark/tpcc/driver.cc b/src/benchmark/tpcc/driver.cc new file mode 100644 index 000000000..aed2e6235 --- /dev/null +++ b/src/benchmark/tpcc/driver.cc @@ -0,0 +1,190 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/driver.h" +#include "benchmark/tpcc/tpccdb.h" +#include "common/thread_pool.h" +#include "common/timer.h" + +DECLARE_int32(driver_wait_times); +DECLARE_int32(warehouses_count); +DECLARE_int32(tpcc_run_gtxn_thread_pool_size); +DECLARE_int64(transactions_count); + +namespace tera { +namespace tpcc { + +Driver::Driver(RandomGenerator* rand_gen, TpccDb* db) + : event_(), + rand_gen_(rand_gen), + db_(db), + now_datatime_(get_curtime_str()), + thread_pool_(FLAGS_tpcc_run_gtxn_thread_pool_size) { +} + +void Driver::PrintJoinTimeoutInfo(int need_cnt, int table_enum_num) { + if (need_cnt < states_[table_enum_num].first.Get() + states_[table_enum_num].second.Get()) { + LOG(ERROR) << "table:" << kTpccTables[table_enum_num] + << "[need/succ/fail]:[" + << need_cnt << "/" + << states_[table_enum_num].first.Get() << "/" + << states_[table_enum_num].first.Get() << "]"; + } +} + +void Driver::RunTransactions() { + for (int64_t i = 0; i < FLAGS_transactions_count; ++i) { + RunOneTransaction(); + } +} + +void Driver::Join() { + event_.Trigger(); + if (!event_.TimeWait(FLAGS_driver_wait_times)) { + // TODO + } +} + +void Driver::RunOneTransaction() { + int rand_num = rand_gen_->GetRandom(1, 100); + if (rand_num <= kTpccTransactionRatios[0]) { // %4 do stock_level + RunStockLevelTxn(); + } else if (rand_num <= kTpccTransactionRatios[1]) { // %4 do order_status + RunOrderStatusTxn(); + } else if (rand_num <= kTpccTransactionRatios[2]) { // %4 do delivery + RunDeliveryTxn(); + } else if (rand_num <= kTpccTransactionRatios[3]) { // %43 do payment + RunPaymentTxn(); + } else { // %45 do new_order + RunNewOrderTxn(); + } +} + +void Driver::RunStockLevelTxn() { + int32_t threshold = rand_gen_->GetRandom(kMinStockLevelThreshold, kMaxStockLevelThreshold); + StockLevelResult ret; + db_->StockLevelTxn(FindWareHouse(), FindDistrict(), threshold, &ret); +} + +void Driver::RunOrderStatusTxn() { + int x = rand_gen_->GetRandom(1, 100); + OrderStatusResult ret; + if (x <= 60) { + // 60% order_status by lastname + std::string last_name = GenLastName(rand_gen_, kCustomerCountPerDistrict); + db_->OrderStatusTxn(true, FindWareHouse(), FindDistrict(), + -1, last_name, &ret); + } else { + // 40% order_status by customer_id + db_->OrderStatusTxn(false, FindWareHouse(), FindDistrict(), + FindCustomerId(), "", &ret); + } +} + +void Driver::RunDeliveryTxn() { + int32_t carrier_id = rand_gen_->GetRandom(kMinCarrierId, kMaxCarrierId); + DeliveryResult ret;; + db_->DeliveryTxn(FindWareHouse(), carrier_id, get_curtime_str(), &ret); +} + +void Driver::RunPaymentTxn() { + int32_t warehouse_id = FindWareHouse(); + int32_t district_id = FindDistrict(); + + float h_amount = rand_gen_->MakeFloat(kRuntimeMinAmount, kRuntimeMaxAmount, + kRuntimeAmountDigits); + + int32_t customer_warehouse_id = -1; + int32_t customer_district_id = -1; + + int x = rand_gen_->GetRandom(1, 100); + + // set customer c_w_id and c_d_id + if (FLAGS_warehouses_count == 1 && x <= 85) { + // 85% payment through local warehouse (or only one warehouse) + customer_warehouse_id = warehouse_id; + customer_district_id = district_id; + } else { + // 15% payment through remote warehouse + customer_warehouse_id = + rand_gen_->GetRandom(1, FLAGS_warehouses_count, warehouse_id); + customer_district_id = FindDistrict(); + } + + x = rand_gen_->GetRandom(1, 100); + PaymentResult ret; + if (x <= 60) { + // 60% payment by lastname + std::string last_name = GenLastName(rand_gen_, kCustomerCountPerDistrict); + db_->PaymentTxn(true, warehouse_id, district_id, + customer_warehouse_id, customer_district_id, -1, + last_name, h_amount, &ret); + } else { + // 40% payment by customer_id + db_->PaymentTxn(false, warehouse_id, district_id, + customer_warehouse_id, customer_district_id, FindCustomerId(), + "", h_amount, &ret); + } +} + +void Driver::RunNewOrderTxn() { + int32_t warehouse_id = FindWareHouse(); + + // init NewOrderInfo + NewOrderInfo info; + // 1% of new_order transactions will be failed + info.need_failed = rand_gen_->GetRandom(1,100) == 1 ? true : false; + info.o_ol_cnt = rand_gen_->GetRandom(kMinOrderLineCnt, kMaxOrderLineCnt); + + info.ol_supply_w_ids.reserve(info.o_ol_cnt); + info.ol_i_ids.reserve(info.o_ol_cnt); + info.ol_quantities.reserve(info.o_ol_cnt); + info.o_all_local = 1; + for (int32_t i = 0; i < info.o_ol_cnt; ++i) { + // 1% of orderlines will be remote order + bool remote = rand_gen_->GetRandom(1, 100) == 1 ? true : false; + if (FLAGS_warehouses_count > 1 && remote) { + info.ol_supply_w_ids.emplace_back( + rand_gen_->GetRandom(1, FLAGS_warehouses_count, warehouse_id)); + info.o_all_local = 0; + } else { + info.ol_supply_w_ids.emplace_back(warehouse_id); + } + info.ol_i_ids.emplace_back(FindItemId()); + info.ol_quantities.emplace_back( + rand_gen_->GetRandom(1, kMaxOrderLineQuantity)); + } + + NewOrderResult ret; + db_->NewOrderTxn(warehouse_id, FindDistrict(), FindCustomerId(), info, &ret); +} + +void Driver::PushToInsertQueue(const ThreadPool::Task& task) { + while(thread_pool_.PendingNum() > FLAGS_tpcc_run_gtxn_thread_pool_size / 2) { + usleep(100); + } + thread_pool_.AddTask(task); + VLOG(12) << "thread_pool pending num = " << thread_pool_.PendingNum(); +} + +int32_t Driver::FindWareHouse() { + return rand_gen_->GetRandom(1, FLAGS_warehouses_count); +} + +int32_t Driver::FindDistrict() { + return rand_gen_->GetRandom(1, kDistrictCountPerWarehouse); +} + +int32_t Driver::FindCustomerId() { + return rand_gen_->NURand(1023, 1, kCustomerCountPerDistrict); +} + +int32_t Driver::FindItemId() { + return rand_gen_->NURand(8191, 1, kItemCount); +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/driver.h b/src/benchmark/tpcc/driver.h new file mode 100644 index 000000000..56bf5a66f --- /dev/null +++ b/src/benchmark/tpcc/driver.h @@ -0,0 +1,68 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_BENCHMARK_TPCC_DRIVER_H +#define TERA_BENCHMARK_TPCC_DRIVER_H + +#include +#include + +#include "benchmark/tpcc/random_generator.h" +#include "benchmark/tpcc/tpccdb.h" +#include "common/counter.h" +#include "common/event.h" +#include "common/thread_pool.h" + +namespace tera { +namespace tpcc { + +class Driver { +public: + Driver(RandomGenerator* random_gen, TpccDb* db); + ~Driver(){} + void RunTransactions(); + void Join(); + +private: + void PrintJoinTimeoutInfo(int need_cnt, int table_enum_num); + + // for run transaction + void RunOneTransaction(); + // + void RunStockLevelTxn(); + + void RunOrderStatusTxn(); + + void RunDeliveryTxn(); + + void RunPaymentTxn(); + + void RunNewOrderTxn(); + + // for async run txn + void PushToInsertQueue(const ThreadPool::Task& task); + + int32_t FindWareHouse(); + + int32_t FindDistrict(); + + int32_t FindCustomerId(); + + int32_t FindItemId(); +private: + typedef std::vector> TxnStates; + CompletedEvent event_; + RandomGenerator* rand_gen_; + TpccDb* db_; + TxnStates states_; + std::string now_datatime_; + common::ThreadPool thread_pool_; +}; + +} // namespace tpcc +} // namespace tera + +#endif /* TERA_BENCHMARK_TPCC_DATA_GENERATOR_H */ diff --git a/src/benchmark/tpcc/mock_tpccdb.cc b/src/benchmark/tpcc/mock_tpccdb.cc new file mode 100644 index 000000000..ee8cce0d0 --- /dev/null +++ b/src/benchmark/tpcc/mock_tpccdb.cc @@ -0,0 +1,18 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/mock_tpccdb.h" + +#include +#include + +namespace tera { +namespace tpcc { + +MockTpccDb::MockTpccDb() : flag_(true) {} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/mock_tpccdb.h b/src/benchmark/tpcc/mock_tpccdb.h new file mode 100644 index 000000000..0f29f0320 --- /dev/null +++ b/src/benchmark/tpcc/mock_tpccdb.h @@ -0,0 +1,98 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_BENCHMARK_TPCC_MOCK_TPCCDB_H +#define TERA_BENCHMARK_TPCC_MOCK_TPCCDB_H + +#include "benchmark/tpcc/tpccdb.h" + +namespace tera { +namespace tpcc { + +class TpccDb; +class TxnResult; + +class MockTpccDb : public TpccDb { +public: + MockTpccDb(); + virtual ~MockTpccDb() {} + + virtual bool CreateTables() { return true; } + virtual bool CleanTables() { return true; } + + // init db + virtual bool InsertItem(const Item& i) { + return flag_; + } + + virtual bool InsertWarehouse(const Warehouse& w) { + return flag_; + } + + virtual bool InsertDistrict(const District& d) { + return flag_; + } + + virtual bool InsertCustomer(const Customer& c) { + return flag_; + } + + virtual bool InsertHistory(const History& h) { + return flag_; + } + + virtual bool InsertStock(const Stock& s) { + return flag_; + } + + virtual bool InsertOrder(const Order& o) { + return flag_; + } + + virtual bool InsertOrderLine(const OrderLine& ol) { + return flag_; + } + + virtual bool InsertNewOrder(const NewOrder& no) { + return flag_; + } + + virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, + int32_t threshold, + StockLevelResult* ret) {} + + virtual void DeliveryTxn(int32_t warehouse_id, + int32_t carrier_id, + const std::string& delivery_datetime, + DeliveryResult* ret) {} + + virtual void OrderStatusTxn(bool by_last_name, + int32_t warehouse_id, int32_t district_id, + int32_t c_customer_id, + const std::string& last_name, + OrderStatusResult* ret) {} + + virtual void PaymentTxn(bool by_last_name, + int32_t warehouse_id, int32_t district_id, + int32_t c_warehouse_id, int32_t c_district_id, + int32_t c_customer_id, + const std::string& last_name, + int32_t h_amount, + PaymentResult* ret) {} + + virtual void NewOrderTxn(int32_t warehouse_id, + int32_t district_id, + int32_t customer_id, const NewOrderInfo& info, + NewOrderResult* ret) {} + +private: + bool flag_; +}; + +} // namespace tpcc +} // namespace tera + +#endif /* TERA_BENCHMARK_TPCC_MOCK_TPCCDB_H */ diff --git a/src/benchmark/tpcc/random_generator.cc b/src/benchmark/tpcc/random_generator.cc new file mode 100644 index 000000000..9308ec6e9 --- /dev/null +++ b/src/benchmark/tpcc/random_generator.cc @@ -0,0 +1,132 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/random_generator.h" + +#include + +namespace tera { +namespace tpcc { + +RandomGenerator::RandomGenerator():c_({0,0,0}) { + InitRandomState(); +} + +void RandomGenerator::InitRandomState() { + memset(&rand_state_, 0, sizeof(rand_state_)); + int ret = initstate_r(static_cast(time(NULL)), + rand_state_buf_, + sizeof(rand_state_buf_), + &rand_state_); + assert(ret == 0); +} + +NURandConstant RandomGenerator::GetRandomConstant() const { + return c_; +} + +void RandomGenerator::SetRandomConstant() { + c_.c_last = GetRandom(0, 255); + c_.c_id = GetRandom(0, 1023); + c_.ol_i_id = GetRandom(0, 8191); +} + +inline bool VarfiyConstantAvailableForRun(int run_last, int load_last) { + int delta = run_last - load_last; + delta = delta > 0 ? delta : -1 * delta; + return 65 <=delta && delta <= 119 && delta != 96 && delta != 112; +} + +void RandomGenerator::SetRandomConstant(const NURandConstant& constant_for_load) { + c_.c_last = GetRandom(0, 255); + c_.c_id = GetRandom(0, 1023); + c_.ol_i_id = GetRandom(0, 8191); + while (!VarfiyConstantAvailableForRun(c_.c_last, constant_for_load.c_last)) { + c_.c_last = GetRandom(0, 255); + } +} + +int RandomGenerator::GetRandom(int lower, int upper) { + int ret = 0; + int err = random_r(&rand_state_, &ret); + assert(err == 0); + return lower <= upper ? (ret % (upper - lower + 1) + lower) : (ret % (lower - upper + 1) + upper); +} + +int RandomGenerator::GetRandom(int lower, int upper, int exclude) { + if (exclude > upper || exclude < lower) { + return GetRandom(lower, upper); + } else { + int rand = GetRandom(lower, upper - 1); + if (rand >= exclude) { + ++rand; + } + return rand; + } +} + +std::string RandomGenerator::MakeAString(int lower_len, int upper_len) { + int len = GetRandom(lower_len, upper_len); + std::string ret; + for (int i = 0; i < len; ++i) { + ret += (char)('a' + GetRandom(0, 25)); + } + return ret; +} + +std::string RandomGenerator::MakeNString(int lower_len, int upper_len) { + int len = GetRandom(lower_len, upper_len); + std::string ret; + for (int i = 0; i < len; ++i) { + ret += (char)('0' + GetRandom(0, 9)); + } + return ret; +} + +float RandomGenerator::MakeFloat(float lower, float upper, int digits) { + float num = 1.0; + for (int i = 0; i < digits; ++i) { + num *= 10; + } + return GetRandom(int(lower * num + 0.5), int(upper * num + 0.5)) / num; +} + +std::vector RandomGenerator::MakeDisOrderList(int lower, int upper) { + std::vector ret(upper - lower + 1, -1); + for (int i = 0; i < upper - lower + 1; ++i) { + int rand_pos = GetRandom(0, upper - lower); + while (true) { + if (ret[rand_pos] == -1) { + ret[rand_pos] = lower + i; + break; + } + rand_pos = GetRandom(0, upper - lower); + } + } + return ret; +} + +int RandomGenerator::NURand(int A, int x, int y) { + int C = 0; + switch(A) { + case 255: + C = c_.c_last; + break; + case 1023: + C = c_.c_id; + break; + case 8191: + C = c_.ol_i_id; + break; + default: + LOG(ERROR) << "NURand: A = " << A << " not available"; + abort(); + } + return (((GetRandom(0, A) | GetRandom(x, y)) + C) % (y - x + 1)) + x; +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/random_generator.h b/src/benchmark/tpcc/random_generator.h new file mode 100644 index 000000000..c39070294 --- /dev/null +++ b/src/benchmark/tpcc/random_generator.h @@ -0,0 +1,64 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_BENCHMARK_TPCC_RANDOM_GENERATOR_H +#define TERA_BENCHMARK_TPCC_RANDOM_GENERATOR_H + +#include +#include +#include + +#include "benchmark/tpcc/tpcc_types.h" + +namespace tera { +namespace tpcc { + +struct NURandConstant { + int c_last; + int c_id; + int ol_i_id; +}; + +class RandomGenerator { +public: + RandomGenerator(); + virtual ~RandomGenerator(){} + + NURandConstant GetRandomConstant() const; + void SetRandomConstant(); + void SetRandomConstant(const NURandConstant& constant_for_load); + + // make a string A len=rand[lower_len, upper_len] A[x] = set(a..z) + std::string MakeAString(int lower_len, int upper_len); + + // make a string N len=rand[lower_len, upper_len] N[x] = set(0..9) + std::string MakeNString(int lower_len, int upper_len); + + float MakeFloat(float lower, float upper, int digits); + + std::vector MakeDisOrderList(int lower, int upper); + + int NURand(int A, int lower, int upper); + + // get rand int from [lower, upper] + int GetRandom(int lower, int upper); + + int GetRandom(int lower, int upper, int exclude); +private: + void InitRandomState(); +private: + // for system call random_r and initstate_r + char rand_state_buf_[kRandomStateSize]; + struct random_data rand_state_; + + // for NURand, need a constant + NURandConstant c_; +}; + +} // namespace tpcc +} // namespace tera + +#endif /* TERA_BENCHMARK_TPCC_RANDOM_GENERATOR_H */ diff --git a/src/benchmark/tpcc/tera_tpccdb.cc b/src/benchmark/tpcc/tera_tpccdb.cc new file mode 100644 index 000000000..f35f4ed2a --- /dev/null +++ b/src/benchmark/tpcc/tera_tpccdb.cc @@ -0,0 +1,538 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/tera_tpccdb.h" + +#include +#include + +#include "sdk/client_impl.h" +#include "sdk/sdk_utils.h" + +DECLARE_string(tera_client_flagfile); +DECLARE_string(tera_table_schema_dir); + +namespace tera { +namespace tpcc { + +TeraTpccDb::TeraTpccDb() : client_(NULL) { + ErrorCode error_code; + client_ = Client::NewClient(FLAGS_tera_client_flagfile, "tera_tpcc", &error_code); + if (client_ == NULL) { + LOG(ERROR) << "new client failed. err:" << error_code.ToString(); + _Exit(EXIT_FAILURE); + } +} + +TeraTpccDb::~TeraTpccDb() { + delete client_; +} + +bool TeraTpccDb::CreateTables() { + ErrorCode err; + for (auto table : kTpccTables) { + std::string schema_file = FLAGS_tera_table_schema_dir + table; + TableDescriptor* desc = new TableDescriptor(); + if (ParseTableSchemaFile(schema_file, desc, &err)) { + if (client_->CreateTable(*desc, &err) && err.GetType() == ErrorCode::kOK) { + LOG(INFO) << "create table " << table << " ok"; + Table* table_ptr = client_->OpenTable(table, &err); + if (table_ptr == NULL) { + LOG(ERROR) << "open table " << table << " failed"; + delete desc; + return false; + } else { + table_map_[table] = table_ptr; + LOG(INFO) << "open table " << table << " ok"; + } + } else { + LOG(ERROR) << "create table " << table << " failed"; + delete desc; + return false; + } + } else { + LOG(ERROR) << "load schema failed, schema_file:" << schema_file << "err:" << err.ToString(); + delete desc; + return false; + } + delete desc; + } + return true; +} + +bool TeraTpccDb::CleanTables() { + ErrorCode err; + for (auto table : kTpccTables) { + if (!client_->DisableTable(table, &err)) { + LOG(ERROR) << "fail to disable table : " << table << " err: " <(client_); + if (!client_impl->ShowTablesInfo(table, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << table; + continue; + } + uint64_t tablet_num = tablet_list.meta_size(); + VLOG(11) << tablet_num; + int wait_times = 0; + while (true) { + if (!client_impl->ShowTablesInfo(table, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << table; + break; + } + uint64_t tablet_cnt = 0; + for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { + const TabletMeta& tablet = tablet_list.meta(i); + if (tablet.status() == kTabletDisable || tablet.status() == kTableOffLine) { + tablet_cnt++; + } + } + if (tablet_cnt == tablet_num) { + break; + } + if (wait_times < 20) { + sleep(1); + } else { + LOG(ERROR) << "disable table : " << table << " failed, try " << wait_times << " time(s)"; + break; + } + } + } + if (!client_->DeleteTable(table, &err)) { + LOG(ERROR) << "drop table: " << table << " failed. " << err.ToString(); + } else { + LOG(INFO) << "drop table: "<< table << " done."; + } + } + return true; +} + +// init db +bool TeraTpccDb::InsertItem(const Item& i) { + std::string tablename = "t_item"; + if ( table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(i.PrimaryKey()); + mu->Put("cf0", "i_id", std::to_string(i.i_id)); + mu->Put("cf0", "i_im_id", std::to_string(i.i_im_id)); + mu->Put("cf0", "i_price", std::to_string(i.i_price)); + mu->Put("cf0", "i_name", i.i_name); + mu->Put("cf0", "i_data", i.i_data); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +bool TeraTpccDb::InsertWarehouse(const Warehouse& w) { + std::string tablename = "t_warehouse"; + if ( table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(w.PrimaryKey()); + mu->Put("cf0", "w_id", std::to_string(w.w_id)); + mu->Put("cf0", "w_tax", std::to_string(w.w_tax)); + mu->Put("cf0", "w_ytd", std::to_string(w.w_ytd)); + mu->Put("cf0", "w_name", w.w_name); + mu->Put("cf0", "w_street_1", w.w_street_1); + mu->Put("cf0", "w_street_2", w.w_street_2); + mu->Put("cf0", "w_city", w.w_city); + mu->Put("cf0", "w_state", w.w_state); + mu->Put("cf0", "w_zip", w.w_zip); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +bool TeraTpccDb::InsertDistrict(const District& d) { + std::string tablename = "t_district"; + if ( table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(d.PrimaryKey()); + mu->Put("cf0", "d_id", std::to_string(d.d_id)); + mu->Put("cf0", "d_w_id", std::to_string(d.d_w_id)); + mu->Put("cf0", "d_tax", std::to_string(d.d_tax)); + mu->Put("cf0", "d_ytd", std::to_string(d.d_ytd)); + mu->Put("cf0", "d_next_o_id", std::to_string(d.d_next_o_id)); + mu->Put("cf0", "d_name", d.d_name); + mu->Put("cf0", "d_street_1", d.d_street_1); + mu->Put("cf0", "d_street_2", d.d_street_2); + mu->Put("cf0", "d_city", d.d_city); + mu->Put("cf0", "d_state", d.d_state); + mu->Put("cf0", "d_zip", d.d_zip); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +bool TeraTpccDb::InsertCustomer(const Customer& c) { + std::string tablename = "t_customer"; + std::string c_last_index_name = "t_customer_last_index"; + if ( table_map_.find(tablename) == table_map_.end() + || table_map_.find(c_last_index_name) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Table* t_index = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + std::string key = std::to_string(c.c_w_id) + "_" + std::to_string(c.c_d_id) + + "_" + c.c_last + "_" + std::to_string(c.c_id); + RowMutation* index_mu = t_index->NewRowMutation(key); + index_mu->Put("cf0", "c_id", std::to_string(c.c_id)); + index_mu->Put("cf0", "c_d_id", std::to_string(c.c_d_id)); + index_mu->Put("cf0", "c_w_id", std::to_string(c.c_w_id)); + index_mu->Put("cf0", "c_last", c.c_last); + gtxn->ApplyMutation(index_mu); + delete index_mu; + + RowMutation* mu = table->NewRowMutation(c.PrimaryKey()); + mu->Put("cf0", "c_id", std::to_string(c.c_id)); + mu->Put("cf0", "c_d_id", std::to_string(c.c_d_id)); + mu->Put("cf0", "c_w_id", std::to_string(c.c_w_id)); + mu->Put("cf0", "c_credit_lim", std::to_string(c.c_credit_lim)); + mu->Put("cf0", "c_discount", std::to_string(c.c_discount)); + mu->Put("cf0", "c_balance", std::to_string(c.c_balance)); + mu->Put("cf0", "c_ytd_payment", std::to_string(c.c_ytd_payment)); + mu->Put("cf0", "c_payment_cnt", std::to_string(c.c_payment_cnt)); + mu->Put("cf0", "c_delivery_cnt", std::to_string(c.c_delivery_cnt)); + mu->Put("cf0", "c_first", c.c_first); + mu->Put("cf0", "c_middle", c.c_middle); + mu->Put("cf0", "c_last", c.c_last); + mu->Put("cf0", "c_street_1", c.c_street_1); + mu->Put("cf0", "c_street_2", c.c_street_2); + mu->Put("cf0", "c_city", c.c_city); + mu->Put("cf0", "c_state", c.c_state); + mu->Put("cf0", "c_zip", c.c_zip); + mu->Put("cf0", "c_phone", c.c_phone); + mu->Put("cf0", "c_since", c.c_since); + mu->Put("cf0", "c_credit", c.c_credit); + mu->Put("cf0", "c_data", c.c_data); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +bool TeraTpccDb::InsertHistory(const History& h) { + std::string tablename = "t_history"; + std::string history_index_name = "t_history_index"; + + if (table_map_.find(tablename) == table_map_.end() || + table_map_.find(history_index_name) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Table* t_history_index = table_map_[history_index_name]; + Transaction* gtxn = client_->NewGlobalTransaction(); + + RowReader* hindex_reader = t_history_index->NewRowReader("count"); + RetTuples hindex_ret; + int cnt = -1; + TxnResult ret; + if (hindex_reader->GetError().GetType() != ErrorCode::kNotFound + && !GetValues(&ret, gtxn, hindex_reader, + {"count"}, + &hindex_ret, + "@insert_history|hindex_reader|count")) { + return false; + } else if (hindex_reader->GetError().GetType() == ErrorCode::kNotFound) { + cnt = 0; + } else { + cnt = std::stoi(hindex_ret["count"]); + } + + RowMutation* hindex_mu = t_history_index->NewRowMutation("count"); + hindex_mu->Put("cf0", "count", std::to_string(++cnt)); + gtxn->ApplyMutation(hindex_mu); + delete hindex_mu; + + RowMutation* mu = table->NewRowMutation(std::to_string(cnt)); + mu->Put("cf0", "h_c_id", std::to_string(h.h_c_id)); + mu->Put("cf0", "h_c_d_id", std::to_string(h.h_c_d_id)); + mu->Put("cf0", "h_c_w_id", std::to_string(h.h_c_w_id)); + mu->Put("cf0", "h_d_id", std::to_string(h.h_d_id)); + mu->Put("cf0", "h_w_id", std::to_string(h.h_w_id)); + mu->Put("cf0", "h_amount", std::to_string(h.h_amount)); + mu->Put("cf0", "h_date", h.h_date); + mu->Put("cf0", "h_data", h.h_data); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +bool TeraTpccDb::InsertStock(const Stock& s) { + std::string tablename = "t_stock"; + if ( table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(s.PrimaryKey()); + + mu->Put("cf0", "s_i_id", std::to_string(s.s_i_id)); + mu->Put("cf0", "s_w_id", std::to_string(s.s_w_id)); + mu->Put("cf0", "s_quantity", std::to_string(s.s_quantity)); + mu->Put("cf0", "s_ytd", std::to_string(s.s_ytd)); + mu->Put("cf0", "s_order_cnt", std::to_string(s.s_order_cnt)); + mu->Put("cf0", "s_remote_cnt", std::to_string(s.s_remote_cnt)); + int i = 0; + for (auto dist : s.s_dist) { + mu->Put("cf0", "s_dist_" + std::to_string(++i), dist); + } + mu->Put("cf0", "s_data", s.s_data); + + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +bool TeraTpccDb::InsertOrder(const Order& o) { + std::string tablename = "t_order"; + std::string indexname = "t_order_index"; + if ( table_map_.find(tablename) == table_map_.end() || + table_map_.find(indexname) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Table* index = table_map_[indexname]; + + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(o.PrimaryKey()); + std::string index_key = o.ForeignKey() + "_" + std::to_string(o.o_id); + RowMutation* index_mu = index->NewRowMutation(index_key); + index_mu->Put("cf0", "o_id", std::to_string(o.o_id)); + index_mu->Put("cf0", "o_c_id", std::to_string(o.o_c_id)); + index_mu->Put("cf0", "o_d_id", std::to_string(o.o_d_id)); + index_mu->Put("cf0", "o_w_id", std::to_string(o.o_w_id)); + mu->Put("cf0", "o_id", std::to_string(o.o_id)); + mu->Put("cf0", "o_c_id", std::to_string(o.o_c_id)); + mu->Put("cf0", "o_d_id", std::to_string(o.o_d_id)); + mu->Put("cf0", "o_w_id", std::to_string(o.o_w_id)); + mu->Put("cf0", "o_carrier_id", std::to_string(o.o_carrier_id)); + mu->Put("cf0", "o_ol_cnt", std::to_string(o.o_ol_cnt)); + mu->Put("cf0", "o_all_local", std::to_string(o.o_all_local)); + mu->Put("cf0", "o_entry_d", o.o_entry_d); + gtxn->ApplyMutation(mu); + gtxn->ApplyMutation(index_mu); + delete mu; + delete index_mu; + gtxn->Commit(); + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +bool TeraTpccDb::InsertOrderLine(const OrderLine& ol) { + std::string tablename = "t_orderline"; + if ( table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(ol.PrimaryKey()); + mu->Put("cf0", "ol_o_id", std::to_string(ol.ol_o_id)); + mu->Put("cf0", "ol_d_id", std::to_string(ol.ol_d_id)); + mu->Put("cf0", "ol_w_id", std::to_string(ol.ol_w_id)); + mu->Put("cf0", "ol_number", std::to_string(ol.ol_number)); + mu->Put("cf0", "ol_i_id", std::to_string(ol.ol_i_id)); + mu->Put("cf0", "ol_supply_w_id", std::to_string(ol.ol_supply_w_id)); + mu->Put("cf0", "ol_quantity", std::to_string(ol.ol_quantity)); + mu->Put("cf0", "ol_amount", std::to_string(ol.ol_amount)); + mu->Put("cf0", "ol_delivery_d", ol.ol_delivery_d); + mu->Put("cf0", "ol_dist_info", ol.ol_dist_info); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +bool TeraTpccDb::InsertNewOrder(const NewOrder& no) { + std::string tablename = "t_neworder"; + if ( table_map_.find(tablename) == table_map_.end()) { + return false; + } + Table* table = table_map_[tablename]; + Transaction* gtxn = client_->NewGlobalTransaction(); + RowMutation* mu = table->NewRowMutation(no.PrimaryKey()); + mu->Put("cf0", "no_o_id", std::to_string(no.no_o_id)); + mu->Put("cf0", "no_d_id", std::to_string(no.no_d_id)); + mu->Put("cf0", "no_w_id", std::to_string(no.no_w_id)); + gtxn->ApplyMutation(mu); + gtxn->Commit(); + delete mu; + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + LOG(ERROR) << "insert table:" << tablename << " failed. err:" + << gtxn->GetError().ToString(); + delete gtxn; + return false; + } + delete gtxn; + return true; +} + +void TeraTpccDb::SetTxnResult(TxnResult* ret, Transaction* gtxn, bool state, + const std::string& msg) { + ret->SetState(state); + if (msg != "") { + ret->SetReason(gtxn->GetError().GetReason() + " msg:" + msg); + } else { + ret->SetReason(gtxn->GetError().GetReason()); + } +} + +bool TeraTpccDb::GetValues(TxnResult* ret, Transaction* gtxn, RowReader* reader, + std::initializer_list qu_names_initlist, + RetTuples* ret_tuples, + const std::string& if_error_msg) { + std::vector qu_names(qu_names_initlist); + for (auto& qu_name : qu_names) { + reader->AddColumn("cf0", qu_name); + } + gtxn->Get(reader); + if (gtxn->GetError().GetType() != ErrorCode::kOK) { + SetTxnResult(ret, gtxn, false, if_error_msg); + delete reader; + return false; + } else { + RowReader::TRow row; + reader->ToMap(&row); + for (auto qu_name : qu_names) { + if (row["cf0"].find(qu_name) != row["cf0"].end()) { + for (auto k : row["cf0"][qu_name]) { + ret_tuples->insert({{qu_name, k.second}}); + break; + } + } + } + delete reader; + } + return true; +} + +bool TeraTpccDb::GetCustomer(TxnResult* ret, Transaction* gtxn, bool by_last_name, + const std::string& last_name, int32_t customer_id, + int32_t warehouse_id, int32_t district_id, + std::string* customer_key, RetTuples* customer_ret) { + // open table + Table* t_customer_last_index = table_map_[kTpccTables[kCustomerLastIndex]]; + Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; + *customer_key = std::to_string(warehouse_id) + "_" + std::to_string(district_id) + "_"; + + if (by_last_name) { + ErrorCode error_code; + std::string start_key = *customer_key + last_name + "_"; + ScanDescriptor scan_desc(start_key); + scan_desc.SetEnd(start_key + "~"); + scan_desc.AddColumnFamily("cf0"); + ResultStream* scanner = t_customer_last_index->Scan(scan_desc, &error_code); + std::vector keys; + for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { + std::string row_key = scanner->RowName(); + if (row_key.find(start_key) == std::string::npos) { + break; + } + + RowReader* index_reader = t_customer_last_index->NewRowReader(row_key); + RetTuples index_ret; + if (!GetValues(ret, gtxn, index_reader, + {"c_id"}, + &index_ret, + "@get_customer|index_reader|" + row_key)) { + delete scanner; + return false; + } + keys.push_back(index_ret["c_id"]); + } + delete scanner; + size_t pos = keys.size(); + pos = pos % 2 == 0 ? (pos / 2 - 1) : (pos / 2); + *customer_key += keys.at(pos); + } else { + *customer_key += std::to_string(customer_id); + } + RowReader* customer_reader = t_customer->NewRowReader(*customer_key); + if (!GetValues(ret, gtxn, customer_reader, + {"c_id", "c_d_id", "c_w_id", "c_first", "c_middle", "c_last", + "c_balance", "c_ytd_payment", "c_payment_cnt", "c_credit", + "c_data", "c_street_1", "c_street_2", "c_city", "c_state", + "c_zip", "c_phone", "c_since", "c_credit_lim", "c_discount"}, + customer_ret, + "@get_customer|customer_reader" + *customer_key)) { + return false; + } + return true; +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_tpccdb.h b/src/benchmark/tpcc/tera_tpccdb.h new file mode 100644 index 000000000..a300166b0 --- /dev/null +++ b/src/benchmark/tpcc/tera_tpccdb.h @@ -0,0 +1,101 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_BENCHMARK_TPCC_TERA_TPCCDB_H +#define TERA_BENCHMARK_TPCC_TERA_TPCCDB_H + +#include "tera.h" +#include "benchmark/tpcc/tpccdb.h" + +namespace tera { +namespace tpcc { + +class TpccDb; +class TxnResult; + +class TeraTpccDb : public TpccDb { +public: + TeraTpccDb(); + virtual ~TeraTpccDb(); + + virtual bool CreateTables(); + virtual bool CleanTables(); + + // init db + virtual bool InsertItem(const Item& i); + + virtual bool InsertWarehouse(const Warehouse& w); + + virtual bool InsertDistrict(const District& d); + + virtual bool InsertCustomer(const Customer& c); + + virtual bool InsertHistory(const History& h); + + virtual bool InsertStock(const Stock& s); + + virtual bool InsertOrder(const Order& o); + + virtual bool InsertOrderLine(const OrderLine& ol); + + virtual bool InsertNewOrder(const NewOrder& no); + + virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, + int32_t threshold, + StockLevelResult* ret); + + virtual void DeliveryTxn(int32_t warehouse_id, + int32_t carrier_id, + const std::string& delivery_datetime, + DeliveryResult* ret); + + virtual void OrderStatusTxn(bool by_last_name, + int32_t warehouse_id, int32_t district_id, + int32_t c_customer_id, + const std::string& last_name, + OrderStatusResult* ret); + + virtual void PaymentTxn(bool by_last_name, + int32_t warehouse_id, int32_t district_id, + int32_t c_warehouse_id, int32_t c_district_id, + int32_t c_customer_id, + const std::string& last_name, + int32_t h_amount, + PaymentResult* ret); + + virtual void NewOrderTxn(int32_t warehouse_id, + int32_t district_id, + int32_t customer_id, const NewOrderInfo& info, + NewOrderResult* ret); + +private: + void SetTxnResult(TxnResult* ret, Transaction* gtxn, bool state = true, + const std::string& msg = ""); + + bool GetValues(TxnResult* ret, Transaction* gtxn, RowReader* reader, + std::initializer_list qu_names_initlist, + RetTuples* ret_tuples, + const std::string& if_error_msg); + + bool GetCustomer(TxnResult* ret, Transaction* gtxn, bool by_last_name, + const std::string& last_name, int32_t customer_id, + int32_t warehouse_id, int32_t district_id, + std::string* customer_key, RetTuples* customer_ret); +private: + void SetPaymentSingleLineRet(const RetTuples& warehouse_ret, + const RetTuples& district_ret, + const RetTuples& customer_ret, + const RetTuples& other_ret, + RetTuples* payment_ret); +private: + Client* client_; + std::unordered_map table_map_; +}; + +} // namespace tpcc +} // namespace tera + +#endif /* TERA_BENCHMARK_TPCC_TERA_TPCCDB_H */ diff --git a/src/benchmark/tpcc/tera_txn/delivery_txn.cc b/src/benchmark/tpcc/tera_txn/delivery_txn.cc new file mode 100644 index 000000000..d1a7a3e18 --- /dev/null +++ b/src/benchmark/tpcc/tera_txn/delivery_txn.cc @@ -0,0 +1,144 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/tera_tpccdb.h" + +#include +#include + +#include "sdk/client_impl.h" +#include "sdk/sdk_utils.h" + +namespace tera { +namespace tpcc { + +void TeraTpccDb::DeliveryTxn(int32_t warehouse_id, + int32_t carrier_id, + const std::string& delivery_datetime, + DeliveryResult* ret) { + // open table + Table* t_neworder = table_map_[kTpccTables[kNewOrderTable]]; + Table* t_order = table_map_[kTpccTables[kOrderTable]]; + Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; + Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; + // begin transaction + Transaction* gtxn = client_->NewGlobalTransaction(); + for (int32_t district_id = 1; district_id <= kDistrictCountPerWarehouse; ++district_id) { + // The row in the NEW-ORDER table with matching NO_W_ID (equals W_ID) + // and NO_D_ID (equals D_ID) and with the lowest NO_O_ID value is selected. + ErrorCode error_code; + std::string start_key = std::to_string(warehouse_id) + "_" + std::to_string(district_id) + "_"; + ScanDescriptor scan_desc(start_key); + scan_desc.SetEnd(start_key + "~"); + scan_desc.AddColumnFamily("cf0"); + tera::ResultStream* scanner = t_neworder->Scan(scan_desc, &error_code); + bool not_new_order = false; + int32_t order_id = INT32_MAX; + for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { + std::string row_key = scanner->RowName(); + if (row_key.find(start_key) == std::string::npos) { + not_new_order = true; + break; + } + std::size_t found = row_key.find_last_of("_"); + int32_t found_order_id = std::stoi(row_key.substr(found + 1)); + if (order_id > found_order_id) { + order_id = found_order_id; + } + } + delete scanner; + // If no matching row is found, then the delivery of an order + // for this district is skipped. + if (not_new_order || order_id == INT32_MAX) { + continue; + } + + // The selected row in the NEW-ORDER table is deleted + std::string no_primary_key = start_key + std::to_string(order_id); + RowReader* no_reader = t_neworder->NewRowReader(no_primary_key); + RetTuples no_ret; + if (!GetValues(ret, gtxn, no_reader, + {"no_o_id"}, + &no_ret, + "@delivery|no_reader|" + no_primary_key)) { + return; + } + + RowMutation* no_mu = t_neworder->NewRowMutation(no_primary_key); + no_mu->DeleteColumns("cf0", "no_o_id", gtxn->GetStartTimestamp()); + no_mu->DeleteColumns("cf0", "no_d_id", gtxn->GetStartTimestamp()); + no_mu->DeleteColumns("cf0", "no_w_id", gtxn->GetStartTimestamp()); + gtxn->ApplyMutation(no_mu); + delete no_mu; + + // The row in the ORDER table with matching + // O_W_ID (equals W_ID), O_D_ID (equals D_ID), and O_ID (equals NO_O_ID) + // is selected, O_C_ID, the customer number, is retrieved, + // and O_CARRIER_ID is updated. + std::string order_primary_key = no_primary_key; + RowReader* order_reader = t_order->NewRowReader(order_primary_key); + RetTuples order_ret; + if (!GetValues(ret, gtxn, order_reader, + {"o_carrier_id", "o_ol_cnt", "o_c_id"}, + &order_ret, + "@delivery|order_reader|" + order_primary_key)) { + return; + } + RowMutation* order_mu = t_order->NewRowMutation(order_primary_key); + order_mu->Put("cf0", "o_carrier_id", std::to_string(carrier_id)); + gtxn->ApplyMutation(order_mu); + delete order_mu; + + int32_t o_ol_cnt = std::stoi(order_ret["o_ol_cnt"]); + // the sum of all OL_AMOUNT. + float amount = 0.0f; + // All rows in the ORDER-LINE table with matching + // OL_W_ID (= O_W_ID), OL_D_ID (= O_D_ID), and OL_O_ID (= O_ID) are selected. + for (int32_t ol_number = 1; ol_number <= o_ol_cnt; ++ ol_number) { + std::string ol_key = order_primary_key + "_" + std::to_string(ol_number); + RowReader* ol_reader = t_orderline->NewRowReader(ol_key); + RetTuples ol_ret; + if (!GetValues(ret, gtxn, ol_reader, + {"ol_amount", "ol_delivery_d"}, + &ol_ret, + "@delivery|ol_reader|" + ol_key)) { + return; + } + amount += std::stof(ol_ret["ol_amount"]); + RowMutation* ol_mu = t_orderline->NewRowMutation(ol_key); + // All OL_DELIVERY_D, the delivery dates, + // are updated to the current system time as returned by the OS + ol_mu->Put("cf0","ol_delivery_d",delivery_datetime); + gtxn->ApplyMutation(ol_mu); + delete ol_mu; + } + + // The row in the CUSTOMER table with matching + // C_W_ID (= W_ID), C_D_ID (= D_ID), and C_ID (= O_C_ID) is selected + std::string customer_key = start_key + order_ret["o_c_id"]; + RowReader* customer_reader = t_customer->NewRowReader(customer_key); + RetTuples customer_ret; + if (!GetValues(ret, gtxn, customer_reader, + {"c_balance", "c_delivery_cnt"}, + &customer_ret, + "@delivery|customer_reader" + customer_key)) { + return; + } + // and C_BALANCE + sum(OL_AMOUNT) previously retrieved. C_DELIVERY_CNT + 1. + RowMutation* customer_mu = t_customer->NewRowMutation(customer_key); + customer_mu->Put("cf0", "c_balance", + std::to_string(std::stof(customer_ret["c_balance"]) + amount)); + customer_mu->Put("cf0", "c_delivery_cnt", + std::to_string(std::stoi(customer_ret["c_delivery_cnt"]) + 1)); + gtxn->ApplyMutation(customer_mu); + delete customer_mu; + } + gtxn->Commit(); + SetTxnResult(ret, gtxn, gtxn->GetError().GetType() == ErrorCode::kOK); +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_txn/new_order_txn.cc b/src/benchmark/tpcc/tera_txn/new_order_txn.cc new file mode 100644 index 000000000..df4100824 --- /dev/null +++ b/src/benchmark/tpcc/tera_txn/new_order_txn.cc @@ -0,0 +1,214 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/tera_tpccdb.h" + +#include +#include + +#include "sdk/client_impl.h" +#include "sdk/sdk_utils.h" + +namespace tera { +namespace tpcc { + +void TeraTpccDb::NewOrderTxn(int32_t warehouse_id, + int32_t district_id, + int32_t customer_id, const NewOrderInfo& info, + NewOrderResult* ret) { + // open table + Table* t_warehouse = table_map_[kTpccTables[kWarehouseTable]]; + Table* t_district = table_map_[kTpccTables[kDistrictTable]]; + Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; + Table* t_order = table_map_[kTpccTables[kOrderTable]]; + Table* t_order_index = table_map_[kTpccTables[kOrderIndex]]; + Table* t_neworder = table_map_[kTpccTables[kNewOrderTable]]; + Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; + Table* t_item = table_map_[kTpccTables[kItemTable]]; + Table* t_stock = table_map_[kTpccTables[kStockTable]]; + // begin transaction + std::unique_ptr gtxn(client_->NewGlobalTransaction()); + std::string datetime = get_curtime_str(); + std::string warehouse_key = std::to_string(warehouse_id); + std::string district_key = warehouse_key + "_" + std::to_string(district_id); + std::string customer_key = district_key + "_" + std::to_string(customer_id); + + RowReader* warehouse_reader = t_warehouse->NewRowReader(warehouse_key); + RetTuples warehouse_ret; + if (!GetValues(ret, gtxn.get(), warehouse_reader, + {"w_tax"}, + &warehouse_ret, + "@new_order|warehouse_reader|" + warehouse_key)) { + return; + } + + RowReader* district_reader = t_district->NewRowReader(district_key); + RetTuples district_ret; + if (!GetValues(ret, gtxn.get(), district_reader, + {"d_next_o_id", "d_tax"}, + &district_ret, + "@new_order|district_reader|" + district_key)) { + return; + } + std::string d_next_o_id_str = std::to_string(std::stoi(district_ret["d_next_o_id"]) + 1); + + RowReader* customer_reader = t_customer->NewRowReader(customer_key); + RetTuples customer_ret; + if (!GetValues(ret, gtxn.get(), customer_reader, + {"c_discount", "c_credit", "c_last"}, + &customer_ret, + "@new_order|customer_reader|" + customer_key)) { + return; + } + + RowMutation* district_mu = t_district->NewRowMutation(district_key); + district_mu->Put("cf0", "d_next_o_id", d_next_o_id_str); + gtxn->ApplyMutation(district_mu); + delete district_mu; + + std::string order_key = district_key + "_" + d_next_o_id_str; + RowMutation* order_mu = t_order->NewRowMutation(order_key); + std::string order_index_key = customer_key + "_" + d_next_o_id_str; + RowMutation* order_index_mu = t_order_index->NewRowMutation(order_index_key); + order_index_mu->Put("cf0", "o_id", d_next_o_id_str); + order_index_mu->Put("cf0", "o_c_id", std::to_string(customer_id)); + order_index_mu->Put("cf0", "o_d_id", std::to_string(district_id)); + order_index_mu->Put("cf0", "o_w_id", warehouse_key); + order_mu->Put("cf0", "o_id", d_next_o_id_str); + order_mu->Put("cf0", "o_c_id", std::to_string(customer_id)); + order_mu->Put("cf0", "o_d_id", std::to_string(district_id)); + order_mu->Put("cf0", "o_w_id", warehouse_key); + order_mu->Put("cf0", "o_carrier_id", std::to_string(0)); + order_mu->Put("cf0", "o_ol_cnt", std::to_string(info.o_ol_cnt)); + order_mu->Put("cf0", "o_all_local", std::to_string(info.o_all_local)); + order_mu->Put("cf0", "o_entry_d", datetime); + gtxn->ApplyMutation(order_mu); + gtxn->ApplyMutation(order_index_mu); + delete order_mu; + delete order_index_mu; + + RowMutation* no_mu = t_neworder->NewRowMutation(order_key); + no_mu->Put("cf0", "no_o_id", d_next_o_id_str); + no_mu->Put("cf0", "no_d_id", std::to_string(district_id)); + no_mu->Put("cf0", "no_w_id", warehouse_key); + gtxn->ApplyMutation(no_mu); + delete no_mu; + + std::string ol_dist_info_key; + if (district_id == kDistrictCountPerWarehouse) { + ol_dist_info_key = "s_dist_10"; + } else { + ol_dist_info_key = "s_dist_0" + std::to_string(district_id); + } + + float ol_amount_sum = 0; + for (int32_t i = 0; i < info.o_ol_cnt; ++i) { + int32_t i_id = info.ol_i_ids[i]; + std::string item_key = std::to_string(i_id); + RowReader* item_reader = t_item->NewRowReader(item_key); + RetTuples item_ret; + if (!GetValues(ret, gtxn.get(), item_reader, + {"i_price", "i_name", "i_data"}, + &item_ret, + "@new_order|item_reader|" + item_key)) { + return; + } + + std::string ol_supply_w_id_str = std::to_string(info.ol_supply_w_ids[i]); + std::string stock_key = ol_supply_w_id_str+ "_" + item_key; + RowReader* stock_reader = t_item->NewRowReader(stock_key); + RetTuples stock_ret; + if (!GetValues(ret, gtxn.get(), stock_reader, + {"s_quantity", "s_ytd", "s_order_cnt", "s_remote_cnt", "s_data", ol_dist_info_key}, + &stock_ret, + "@new_order|stock_reader|" + stock_key)) { + return; + } + + int32_t ol_quantity = info.ol_quantities[i]; + float ol_amount = std::stof(item_ret["i_price"]) * ol_quantity; + ol_amount_sum += ol_amount; + std::string ol_number_str = std::to_string(i + 1); + std::string ol_key = order_key + "_" + ol_number_str; + RowMutation* ol_mu = t_orderline->NewRowMutation(ol_key); + ol_mu->Put("cf0", "ol_o_id", d_next_o_id_str); + ol_mu->Put("cf0", "ol_d_id", std::to_string(district_id)); + ol_mu->Put("cf0", "ol_w_id", warehouse_key); + ol_mu->Put("cf0", "ol_number", ol_number_str); + ol_mu->Put("cf0", "ol_i_id", item_key); + ol_mu->Put("cf0", "ol_supply_w_id", ol_supply_w_id_str); + ol_mu->Put("cf0", "ol_delivery_d", ""); + ol_mu->Put("cf0", "ol_quantity", std::to_string(ol_quantity)); + ol_mu->Put("cf0", "ol_amount", std::to_string(ol_amount)); + ol_mu->Put("cf0", "ol_dist_info", stock_ret[ol_dist_info_key]); + gtxn->ApplyMutation(ol_mu); + delete ol_mu; + // update stock + int32_t s_quantity = std::stoi(stock_ret["s_quantity"]); + if (s_quantity > ol_quantity + 10) { + s_quantity -= ol_quantity; + } else { + s_quantity = (s_quantity - ol_quantity) + 91; + } + float s_ytd = std::stof(stock_ret["s_quantity"]) + ol_quantity; + int32_t s_order_cnt = std::stoi(stock_ret["s_order_cnt"]) + 1; + int32_t s_remote_cnt = std::stoi(stock_ret["s_remote_cnt"]); + if (info.ol_supply_w_ids[i] != warehouse_id) { + ++s_remote_cnt; + } + RowMutation* stock_mu = t_stock->NewRowMutation(stock_key); + stock_mu->Put("cf0", "s_quantity", std::to_string(s_quantity)); + stock_mu->Put("cf0", "s_ytd", std::to_string(s_ytd)); + stock_mu->Put("cf0", "s_order_cnt", std::to_string(s_order_cnt)); + stock_mu->Put("cf0", "s_remote_cnt", std::to_string(s_remote_cnt)); + gtxn->ApplyMutation(stock_mu); + delete stock_mu; + + // set result + RetTuples line; + line["ol_supply_w_id"] = ol_supply_w_id_str; + line["ol_i_id"] = item_key; + line["i_name"] = item_ret["i_name"]; + line["ol_quantity"] = std::to_string(ol_quantity); + line["s_quantity"] = std::to_string(s_quantity); + line["i_price"] = item_ret["i_price"]; + line["ol_amount"] = std::to_string(ol_amount); + std::string i_data = item_ret["i_data"]; + std::string s_data = item_ret["s_data"]; + if (i_data.find("ORIGINAL") != std::string::npos && + s_data.find("ORIGINAL") != std::string::npos) { + line["brand_generic"] = "B"; + } else { + line["brand_generic"] = "G"; + } + ret->AddLine(line); + } + if (!info.need_failed) { + RetTuples single_line; + single_line["o_id"] = d_next_o_id_str; + single_line["o_ol_cnt"] = std::to_string(info.o_ol_cnt); + single_line["c_last"] = customer_ret["c_last"]; + single_line["c_credit"] = customer_ret["c_credit"]; + single_line["c_discount"] = customer_ret["c_discount"]; + single_line["w_tax"] = warehouse_ret["w_tax"]; + single_line["d_tax"] = district_ret["d_tax"]; + single_line["o_entry_d"] = datetime; + float c_discount = std::stof(customer_ret["c_discount"]); + float w_tax = std::stof(warehouse_ret["w_tax"]); + float d_tax = std::stof(district_ret["d_tax"]); + float total_amount = ol_amount_sum * ( 1 - c_discount) * (1 + w_tax + d_tax); + single_line["total_amount"] = std::to_string(total_amount); + ret->SetSingleLine(single_line); + gtxn->Commit(); + SetTxnResult(ret, gtxn.get()); + } else { + // set commit failed + SetTxnResult(ret, gtxn.get(), false, "@new_order|rowback simulation"); + } +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_txn/order_status_txn.cc b/src/benchmark/tpcc/tera_txn/order_status_txn.cc new file mode 100644 index 000000000..a88fe7e0c --- /dev/null +++ b/src/benchmark/tpcc/tera_txn/order_status_txn.cc @@ -0,0 +1,89 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/tera_tpccdb.h" + +#include +#include + +#include "sdk/client_impl.h" +#include "sdk/sdk_utils.h" + +namespace tera { +namespace tpcc { + +void TeraTpccDb::OrderStatusTxn(bool by_last_name, + int32_t warehouse_id, int32_t district_id, + int32_t c_customer_id, + const std::string& last_name, + OrderStatusResult* ret) { + // open table + Table* t_order_index = table_map_[kTpccTables[kOrderIndex]]; + Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; + Table* t_order = table_map_[kTpccTables[kOrderTable]]; + // begin transaction + std::unique_ptr gtxn(client_->NewGlobalTransaction()); + std::string customer_key = ""; + RetTuples customer_ret; + if (!GetCustomer(ret, gtxn.get(), by_last_name, last_name, c_customer_id, + warehouse_id, district_id, &customer_key, &customer_ret)) { + return; + } + + // find newest order from order index + ErrorCode error_code; + std::string prefix_key = std::to_string(warehouse_id) + "_" + + std::to_string(district_id) + "_"; + std::string start_key = prefix_key + customer_ret["c_id"] + "_"; + ScanDescriptor scan_desc(start_key); + scan_desc.SetEnd(start_key + "~"); + scan_desc.AddColumnFamily("cf0"); + ResultStream* scanner = t_order_index->Scan(scan_desc, &error_code); + int32_t max_order_id = -1; + for (scanner->LookUp(start_key); !scanner->Done(); scanner->Next()) { + std::string row_key = scanner->RowName(); + RowReader* index_reader = t_order_index->NewRowReader(row_key); + RetTuples index_ret; + if (!GetValues(ret, gtxn.get(), index_reader, + {"o_id"}, + &index_ret, + "@order_status|order_index_reader|" + row_key)) { + break; + } + if ( max_order_id < std::stoi(index_ret["o_id"])) { + max_order_id = std::stoi(index_ret["o_id"]); + } + } + delete scanner; + if (max_order_id == -1) { + SetTxnResult(ret, gtxn.get(), false, "not found order|" + start_key); + return; + } + std::string order_key = prefix_key + std::to_string(max_order_id); + RowReader* order_reader = t_order->NewRowReader(order_key); + RetTuples order_ret; + if (!GetValues(ret, gtxn.get(), order_reader, + {"o_ol_cnt", "o_id"}, + &order_ret, + "@order_status|order_reader|" + order_key)) { + return; + } + for (int32_t i = 1; i <= std::stoi(order_ret["o_ol_cnt"]); ++i) { + std::string ol_key = prefix_key + order_ret["o_id"] + "_" + std::to_string(i); + RowReader* ol_reader = t_orderline->NewRowReader(ol_key); + RetTuples ol_ret; + if (!GetValues(ret, gtxn.get(), ol_reader, + {}, // TODO + &ol_ret, + "@order_status|ol_reader|" + ol_key)) { + return; + } + } + SetTxnResult(ret, gtxn.get()); +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_txn/payment_txn.cc b/src/benchmark/tpcc/tera_txn/payment_txn.cc new file mode 100644 index 000000000..c45d371bd --- /dev/null +++ b/src/benchmark/tpcc/tera_txn/payment_txn.cc @@ -0,0 +1,194 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/tera_tpccdb.h" + +#include +#include + +#include "sdk/client_impl.h" +#include "sdk/sdk_utils.h" + +namespace tera { +namespace tpcc { + +void TeraTpccDb::PaymentTxn(bool by_last_name, + int32_t warehouse_id, int32_t district_id, + int32_t customer_warehouse_id, int32_t customer_district_id, + int32_t c_customer_id, + const std::string& last_name, + int32_t h_amount, + PaymentResult* ret) { + // open table + Table* t_warehouse = table_map_[kTpccTables[kWarehouseTable]]; + Table* t_district = table_map_[kTpccTables[kDistrictTable]]; + Table* t_customer = table_map_[kTpccTables[kCustomerTable]]; + Table* t_history = table_map_[kTpccTables[kHistoryTable]]; + Table* t_history_index = table_map_[kTpccTables[kHistoryIndex]]; + + // begin transaction + Transaction* gtxn = client_->NewGlobalTransaction(); + + // read customer + std::string customer_key = ""; + RetTuples customer_ret; + if (!GetCustomer(ret, gtxn, by_last_name, last_name, c_customer_id, + customer_warehouse_id, customer_district_id, &customer_key, &customer_ret)) { + return; + } + + // read warehouse + std::string warehouse_key = std::to_string(warehouse_id); + RowReader* warehouse_reader = t_warehouse->NewRowReader(warehouse_key); + RetTuples warehouse_ret; + if (!GetValues(ret, gtxn, warehouse_reader, + {"w_ytd", "w_name", "w_street_1", "w_street_2", "w_city", "w_state", "w_zip"}, + &warehouse_ret, + "@payment|warehouse_reader|" + warehouse_key)) { + return; + } + + // update warehouse + RowMutation* warehouse_mu = t_warehouse->NewRowMutation(warehouse_key); + // add amount of this payment to the ytd balance of current warehouse. + float w_ytd = std::stof(warehouse_ret["w_ytd"]) + h_amount; + warehouse_mu->Put("cf0", "w_ytd", std::to_string(w_ytd)); + gtxn->ApplyMutation(warehouse_mu); + delete warehouse_mu; + + // read district + std::string district_id_str = std::to_string(district_id); + std::string district_key = warehouse_key + "_" + district_id_str; + RowReader* district_reader = t_district->NewRowReader(district_key); + RetTuples district_ret; + if (!GetValues(ret, gtxn, district_reader, + {"d_ytd", "d_name", "d_street_1", "d_street_2", "d_city", "d_state", "d_zip"}, + &district_ret, + "@payment|district_reader|" + district_key)) { + return; + } + + // update district + RowMutation* district_mu = t_district->NewRowMutation(district_key); + // add amount of this payment to the ytd balance of current district. + float d_ytd = std::stof(district_ret["d_ytd"]) + h_amount; + district_mu->Put("cf0", "d_ytd", std::to_string(d_ytd)); + gtxn->ApplyMutation(district_mu); + delete district_mu; + + // update customer + // [Revision 5.11 - Page 34] see Clause 2.5.2.2 + // C_BALANCE is decreased by H_AMOUNT. + // C_YTD_PAYMENT is increased by H_AMOUNT. + // C_PAYMENT_CNT is incremented by 1. + RowMutation* customer_mu = t_customer->NewRowMutation(customer_key); + std::string c_balance_str = std::to_string(std::stof(customer_ret["c_balance"]) - h_amount); + customer_mu->Put("cf0", "c_balance", c_balance_str); + customer_mu->Put("cf0", "c_ytd_payment", + std::to_string(std::stof(customer_ret["c_ytd_payment"]) + h_amount)); + customer_mu->Put("cf0", "c_payment_cnt", + std::to_string(std::stof(customer_ret["c_payment_cnt"]) + h_amount)); + + if (customer_ret["c_credit"] == "BC") { + std::string data_info = customer_key + "_" + district_key + "_" + std::to_string(h_amount); + customer_ret["c_data"].insert(0, data_info); + if (customer_ret["c_data"].size() > kCustomerDataUpperLen) { + customer_ret["c_data"].substr(0, kCustomerDataUpperLen); + } + customer_mu->Put("cf0", "c_data", customer_ret["c_data"]); + } + gtxn->ApplyMutation(customer_mu); + delete customer_mu; + + // read history_index (find newest history) + std::string history_data = warehouse_ret["w_name"] + " " + district_ret["d_name"]; + RowReader* hindex_reader = t_history_index->NewRowReader("count"); + RetTuples hindex_ret; + if (!GetValues(ret, gtxn, hindex_reader, + {"count"}, + &hindex_ret, + "@payment|hindex_reader|count")) { + return; + } + int cnt = std::stoi(hindex_ret["count"]); + + // update history_index + RowMutation* hindex_mu = t_history_index->NewRowMutation("count"); + hindex_mu->Put("cf0", "count", std::to_string(++cnt)); + gtxn->ApplyMutation(hindex_mu); + delete hindex_mu; + + // update history use now newest count as the primary key(row_key) of history + // default t_history don't have priamry key in tpcc + std::string history_key = std::to_string(cnt); + RowMutation* mu = t_history->NewRowMutation(history_key); + mu->Put("cf0", "h_c_id", customer_ret["c_id"]); + mu->Put("cf0", "h_c_d_id", customer_ret["c_d_id"]); + mu->Put("cf0", "h_c_w_id", customer_ret["c_w_id"]); + mu->Put("cf0", "h_d_id", district_id_str); + mu->Put("cf0", "h_w_id", warehouse_key); + mu->Put("cf0", "h_amount", std::to_string(h_amount)); + // The payment date (H_DATE) in generated within the SUT + // by using the current system date and time + std::string datetime = get_curtime_str(); + mu->Put("cf0", "h_date", datetime); + mu->Put("cf0", "h_data", history_data); + gtxn->ApplyMutation(mu); + delete mu; + + gtxn->Commit(); + RetTuples single_line; + RetTuples other_ret = { + {"w_id", warehouse_key}, + {"d_id", district_id_str}, + {"h_amount", std::to_string(h_amount)}, + {"h_date", datetime}, + {"c_balance", c_balance_str}, + {"c_data", customer_ret["c_data"].substr(0,200)} + }; + SetPaymentSingleLineRet(warehouse_ret, district_ret, customer_ret, other_ret, + &single_line); + + SetTxnResult(ret, gtxn); +} + +void TeraTpccDb::SetPaymentSingleLineRet(const RetTuples& warehouse_ret, + const RetTuples& district_ret, + const RetTuples& customer_ret, + const RetTuples& other_ret, + RetTuples* payment_ret) { + // The following fields are displayed: + // W_ID, D_ID, C_ID, C_D_ID, C_W_ID, + // W_STREET_1, W_STREET_2, W_CITY, W_STATE, W_ZIP, + // D_STREET_1, D_STREET_2, D_CITY, D_STATE, D_ZIP, + // C_FIRST, C_MIDDLE, C_LAST, C_STREET_1, C_STREET_2, C_CITY, C_STATE, + // C_ZIP, C_PHONE, C_SINCE, C_CREDIT, C_CREDIT_LIM, C_DISCOUNT, C_BALANCE, + // the first 200 characters of C_DATA (only if C_CREDIT = "BC"), + // H_AMOUNT, and H_DATE. + payment_ret->insert(other_ret.begin(), other_ret.end()); + for (auto t : warehouse_ret) { + if (t.first != "w_ytd" && t.first != "w_name") { + payment_ret->insert(t); + } + } + for (auto t : district_ret) { + if (t.first != "d_ytd" && t.first != "w_name") { + payment_ret->insert(t); + } + } + std::unordered_set c_names = {"c_id", "c_d_id", "c_w_id", + "c_first", "c_middle", "c_last", "c_street_1", "c_street_2", "c_city", + "c_state", "c_zip", "c_phone", "c_since", "c_credit", "c_credit_lim", + "c_discount"}; + for (auto t : customer_ret) { + if (c_names.find(t.first) != c_names.end()) { + payment_ret->insert(t); + } + } +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tera_txn/stocklevel_txn.cc b/src/benchmark/tpcc/tera_txn/stocklevel_txn.cc new file mode 100644 index 000000000..eeb7bb06d --- /dev/null +++ b/src/benchmark/tpcc/tera_txn/stocklevel_txn.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/tera_tpccdb.h" + +#include +#include + +#include "sdk/client_impl.h" +#include "sdk/sdk_utils.h" + +namespace tera { +namespace tpcc { + +void TeraTpccDb::StockLevelTxn(int32_t warehouse_id, int32_t district_id, + int32_t threshold, + StockLevelResult* ret) { + // open table + Table* t_district = table_map_[kTpccTables[kDistrictTable]]; + Table* t_order = table_map_[kTpccTables[kOrderTable]]; + Table* t_orderline = table_map_[kTpccTables[kOrderLineTable]]; + Table* t_stock = table_map_[kTpccTables[kStockTable]]; + // begin transaction + std::unique_ptr gtxn(client_->NewGlobalTransaction()); + std::string district_primary_key = std::to_string(warehouse_id) + + "_" + std::to_string(district_id); + RowReader* district_reader = t_district->NewRowReader(district_primary_key); + RetTuples district_ret; + if (!GetValues(ret, gtxn.get(), district_reader, {"d_next_o_id"}, &district_ret, + "@stock_level|district_reader|" + district_primary_key)) { + return; + } + int32_t order_id = std::stoi(district_ret["d_next_o_id"]); + + int32_t cnt = 0; + for (int32_t ol_o_id = order_id - 20; ol_o_id <= order_id; ++ol_o_id) { + std::string order_primary_key = std::to_string(warehouse_id) + + "_" + std::to_string(district_id) + "_" + std::to_string(ol_o_id); + RowReader* order_reader = t_order->NewRowReader(order_primary_key); + RetTuples order_ret; + if (!GetValues(ret, gtxn.get(), order_reader, {"o_ol_cnt"}, &order_ret, + "@stock_level|order_reader|" + order_primary_key)) { + return; + } + int32_t o_ol_cnt = std::stoi(order_ret["o_ol_cnt"]); + for (int32_t ol_number = 1; ol_number <= o_ol_cnt; ++ ol_number) { + std::string ol_primary_key = order_primary_key + "_" + std::to_string(ol_number); + RowReader* ol_reader = t_orderline->NewRowReader(ol_primary_key); + RetTuples ol_ret; + ol_reader->AddColumn("cf0", "ol_i_id"); + if (!GetValues(ret, gtxn.get(), ol_reader, {"ol_i_id"}, &ol_ret, + "@stock_level|ol_reader|" + ol_primary_key)) { + return; + } + int32_t ol_i_id = std::stoi(ol_ret["ol_i_id"]); + std::string stock_key = std::to_string(warehouse_id) + + "_" + std::to_string(ol_i_id); + RowReader* stock_reader = t_stock->NewRowReader(stock_key); + RetTuples stock_ret; + if (!GetValues(ret, gtxn.get(), stock_reader, {"s_quantity"}, &stock_ret, + "@stock_level|stock_reader|" + stock_key)) { + return; + } + int32_t s_quantity = std::stoi(stock_ret["s_quantity"]); + if (s_quantity < threshold) { + ++cnt; + } + } + } + // only read not need commit + ret->SetLowStock(cnt); + SetTxnResult(ret, gtxn.get()); +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/test/data_generator_test.cc b/src/benchmark/tpcc/test/data_generator_test.cc new file mode 100644 index 000000000..6c5b71fe7 --- /dev/null +++ b/src/benchmark/tpcc/test/data_generator_test.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include + +#include "benchmark/tpcc/data_generator.h" +#include "benchmark/tpcc/mock_tpccdb.h" +#include "benchmark/tpcc/random_generator.h" +#include "benchmark/tpcc/tpccdb.h" + +#include "gflags/gflags.h" +#include "gtest/gtest.h" + +DECLARE_int32(warehouses_count); + +namespace tera { +namespace tpcc { + +class DataGeneratorTest : public ::testing::Test { +public: + DataGeneratorTest() { + random_gen_.SetRandomConstant(); + TpccDb* db_ = (TpccDb*)(&mdb_); + data_gen_ = new DataGenerator(&random_gen_, db_); + } + + void CleanStateCounter(int table_enum_num = -1) { + if (table_enum_num == -1) { + for (int i = 0; i < kTpccTableCnt; ++i) { + data_gen_->states_[i].first.Set(0); + data_gen_->states_[i].second.Set(0); + } + } else if (table_enum_num > -1 && table_enum_num < kTpccTableCnt) { + data_gen_->states_[table_enum_num].first.Set(0); + data_gen_->states_[table_enum_num].second.Set(0); + } + } + + ~DataGeneratorTest() { + delete data_gen_; + } +private: + RandomGenerator random_gen_; + TpccDb* db_; + MockTpccDb mdb_; + DataGenerator* data_gen_; + +}; + +TEST_F(DataGeneratorTest, GenItem) { + CleanStateCounter(); + mdb_.flag_ = true; + data_gen_->GenItem(1, false); + EXPECT_TRUE(data_gen_->states_[kItemTable].first.Get() == 1); + data_gen_->GenItem(1, false); + EXPECT_TRUE(data_gen_->states_[kItemTable].first.Get() == 2); + mdb_.flag_ = false; + data_gen_->GenItem(1, false); + EXPECT_TRUE(data_gen_->states_[kItemTable].second.Get() == 1); +} + +TEST_F(DataGeneratorTest, GenStock) { + CleanStateCounter(); + mdb_.flag_ = true; + data_gen_->GenStock(1, 2, false); + EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == 1); + data_gen_->GenStock(1, 2, false); + EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == 2); + mdb_.flag_ = false; + data_gen_->GenStock(1, 3, false); + EXPECT_TRUE(data_gen_->states_[kStockTable].second.Get() == 1); +} + +TEST_F(DataGeneratorTest, GenStocks) { + CleanStateCounter(); + mdb_.flag_ = true; + for (int i = 1; i <=FLAGS_warehouses_count; ++i) { + data_gen_->GenStocks(i); + } + data_gen_->Join(); + EXPECT_TRUE(data_gen_->states_[kStockTable].first.Get() == FLAGS_warehouses_count * kItemCount); +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/test/random_generator_test.cc b/src/benchmark/tpcc/test/random_generator_test.cc new file mode 100644 index 000000000..978521739 --- /dev/null +++ b/src/benchmark/tpcc/test/random_generator_test.cc @@ -0,0 +1,81 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "benchmark/tpcc/random_generator.h" + +#include "gtest/gtest.h" + +namespace tera { +namespace tpcc { + +class RandomGenerator; + +class RandomGeneratorTest : public ::testing::Test, public RandomGenerator { +public: + RandomGeneratorTest() : RandomGenerator() { + SetRandomConstant(); + } + + ~RandomGeneratorTest() {} +}; + +TEST_F(RandomGeneratorTest, MakeFloat) { + EXPECT_EQ(MakeFloat(1.0, 1.0, 1), 1.0); + float f = MakeFloat(0, 1.0, 2); + std::cout << std::to_string(f) << std::endl; + EXPECT_TRUE(f >= 0 && f <= 1); +} + +TEST_F(RandomGeneratorTest, MakeAString) { + EXPECT_TRUE(MakeAString(0, 0) == ""); + EXPECT_TRUE((MakeAString(1, 1)).length() == 1); + std::string a_str = MakeAString(1,10); + EXPECT_TRUE(a_str.length() <= 10 && a_str.length() >= 1); + std::string a_str1 = MakeAString(26,27); + int cnt = 0; + for (int i = 0; i < a_str1.length(); ++i) { + for (int j = i + 1; j < a_str1.length(); ++j) { + if (a_str1[i] == a_str1[j]) { + ++cnt; + } + } + } + EXPECT_TRUE(cnt > 0); +} + +TEST_F(RandomGeneratorTest, MakeNString) { + EXPECT_TRUE(MakeNString(0, 0) == ""); + EXPECT_TRUE((MakeNString(1, 1)).length() == 1); + std::string n_str = MakeNString(1,10); + EXPECT_TRUE(n_str.length() <= 10 && n_str.length() >= 1); +} + +TEST_F(RandomGeneratorTest, MakeDisOrderList) { + std::vector dis_order_list = MakeDisOrderList(10,20); + sort(dis_order_list.begin(),dis_order_list.end()); + for (int i = 10; i <= 20; ++i) { + EXPECT_EQ(dis_order_list[i-10], i); + } +} + +TEST_F(RandomGeneratorTest, SetRandomConstant) { + SetRandomConstant(); + NURandConstant c = GetRandomConstant(); + EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 255); + EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 1023); + EXPECT_TRUE(c.c_last >= 0 && c.c_last <= 8191); +} + +TEST_F(RandomGeneratorTest, GetRandom) { + EXPECT_EQ(GetRandom(1, 1) , 1); + int rand_num = GetRandom(0, 1); + int rand_num1 = GetRandom(1, 0); + EXPECT_TRUE(rand_num == 0 || rand_num == 1); + EXPECT_TRUE(rand_num == 0 || rand_num == 1); +} + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/test/tpcc_test.cc b/src/benchmark/tpcc/test/tpcc_test.cc new file mode 100644 index 000000000..04d5b4890 --- /dev/null +++ b/src/benchmark/tpcc/test/tpcc_test.cc @@ -0,0 +1,21 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +namespace tera { +namespace tpcc { + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + + +} // namespace tpcc +} // namespace tera diff --git a/src/benchmark/tpcc/tpcc_flags.cc b/src/benchmark/tpcc/tpcc_flags.cc new file mode 100644 index 000000000..4de8b300e --- /dev/null +++ b/src/benchmark/tpcc/tpcc_flags.cc @@ -0,0 +1,17 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "gflags/gflags.h" + +DEFINE_int64(transactions_count, 200, "the count of transactions"); +DEFINE_int32(warehouses_count, 2, "the count of warsehouses"); +DEFINE_int32(tpcc_thread_pool_size, 20, "size of tpcc thread pool"); +DEFINE_int32(tpcc_run_gtxn_thread_pool_size, 20, "size of tpcc run global transactions thread pool"); +DEFINE_string(db_type, "tera", "test db type"); +DEFINE_string(tera_client_flagfile, "./tera.flag", "the flag file path of tera client"); +DEFINE_string(tera_table_schema_dir, "./tpcc_schemas/", "table schema directory"); +DEFINE_int32(generate_data_wait_times, 36000000, "generate data wait times, default 1h"); +DEFINE_int32(driver_wait_times, 36000000, "driver wait times, default 1h"); diff --git a/src/benchmark/tpcc/tpcc_main.cc b/src/benchmark/tpcc/tpcc_main.cc new file mode 100644 index 000000000..2e2df8e26 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_main.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include + +#include +#include + +#include "benchmark/tpcc/data_generator.h" +#include "benchmark/tpcc/driver.h" +#include "benchmark/tpcc/random_generator.h" +#include "benchmark/tpcc/tpccdb.h" +#include "benchmark/tpcc/tpcc_types.h" +#include "types.h" +#include "common/timer.h" +#include "version.h" + +DECLARE_int64(transactions_count); +DECLARE_int32(warehouses_count); +DECLARE_string(db_type); + +int main(int argc, char *argv[]) { + // load conf from flags + ::google::ParseCommandLineFlags(&argc, &argv, true); + + if (argc > 1 && strcmp(argv[1], "version") == 0) { + PrintSystemVersion(); + return 0; + } + if (FLAGS_warehouses_count > tera::tpcc::kMaxWarehouseId + && FLAGS_warehouses_count <= 0) { + LOG(ERROR) << "--warehouses_count=" << FLAGS_warehouses_count << " is not availability"; + return -1; + } + + tera::tpcc::RandomGenerator random_gen; + random_gen.SetRandomConstant(); + + tera::tpcc::TpccDb* db = tera::tpcc::TpccDb::NewTpccDb(FLAGS_db_type); + // do clean tables + if (argc == 2 && strcmp(argv[1], "clean") == 0) { + if(!db->CleanTables()) { + LOG(ERROR) << "clean tables failed, exit"; + _Exit(EXIT_FAILURE); + } + delete db; + return 0; + } + + if (!db->CreateTables()) { + LOG(ERROR) << "create tables failed, exit"; + _Exit(EXIT_FAILURE); + } + + tera::tpcc::DataGenerator data_gen(&random_gen, db); + int64_t beg_ts = tera::get_micros(); + data_gen.GenItems(); + data_gen.GenWarehouses(); + data_gen.Join(); + int64_t cost_t = tera::get_micros() - beg_ts; + LOG(INFO) << "Generate Tables Cost:" << cost_t << "us"; + + // init driver + tera::tpcc::NURandConstant constant = random_gen.GetRandomConstant(); + random_gen.SetRandomConstant(constant); + tera::tpcc::Driver driver(&random_gen, db); + // run test + int64_t beg_txn_ts = tera::get_micros(); + driver.RunTransactions(); + driver.Join(); + int64_t cost_txn_t = tera::get_micros() - beg_txn_ts; + LOG(INFO) << "RunTransactions Cost:" << cost_txn_t << "us"; + delete db; + return 0; +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_customer b/src/benchmark/tpcc/tpcc_schemas/t_customer new file mode 100644 index 000000000..7b8c7ddfd --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_customer @@ -0,0 +1,5 @@ +t_customer { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_customer_last_index b/src/benchmark/tpcc/tpcc_schemas/t_customer_last_index new file mode 100644 index 000000000..e7990ca13 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_customer_last_index @@ -0,0 +1,5 @@ +t_customer_last_index { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_district b/src/benchmark/tpcc/tpcc_schemas/t_district new file mode 100644 index 000000000..2a6cbe3a3 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_district @@ -0,0 +1,5 @@ +t_district { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_history b/src/benchmark/tpcc/tpcc_schemas/t_history new file mode 100644 index 000000000..a21f40001 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_history @@ -0,0 +1,5 @@ +t_history { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_history_index b/src/benchmark/tpcc/tpcc_schemas/t_history_index new file mode 100644 index 000000000..205b3aa23 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_history_index @@ -0,0 +1,5 @@ +t_history_index { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_item b/src/benchmark/tpcc/tpcc_schemas/t_item new file mode 100644 index 000000000..02bf1ff5a --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_item @@ -0,0 +1,5 @@ +t_item { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_neworder b/src/benchmark/tpcc/tpcc_schemas/t_neworder new file mode 100644 index 000000000..e7ef005e0 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_neworder @@ -0,0 +1,5 @@ +t_neworder { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_order b/src/benchmark/tpcc/tpcc_schemas/t_order new file mode 100644 index 000000000..4e7d0139f --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_order @@ -0,0 +1,5 @@ +t_order { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_order_index b/src/benchmark/tpcc/tpcc_schemas/t_order_index new file mode 100644 index 000000000..6d2a47528 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_order_index @@ -0,0 +1,5 @@ +t_order_index { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_orderline b/src/benchmark/tpcc/tpcc_schemas/t_orderline new file mode 100644 index 000000000..d075e7918 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_orderline @@ -0,0 +1,5 @@ +t_orderline { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_stock b/src/benchmark/tpcc/tpcc_schemas/t_stock new file mode 100644 index 000000000..a35115aa0 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_stock @@ -0,0 +1,5 @@ +t_stock { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_schemas/t_warehouse b/src/benchmark/tpcc/tpcc_schemas/t_warehouse new file mode 100644 index 000000000..9102544ff --- /dev/null +++ b/src/benchmark/tpcc/tpcc_schemas/t_warehouse @@ -0,0 +1,5 @@ +t_warehouse { + lg0 { + cf0 + } +} diff --git a/src/benchmark/tpcc/tpcc_types.h b/src/benchmark/tpcc/tpcc_types.h new file mode 100644 index 000000000..c73e9f489 --- /dev/null +++ b/src/benchmark/tpcc/tpcc_types.h @@ -0,0 +1,139 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_BENCHMARK_TPCC_TPCC_TYPES_H +#define TERA_BENCHMARK_TPCC_TPCC_TYPES_H + +#include + +#include +#include + +namespace tera { +namespace tpcc { + +const int kTpccTableCnt = 12; + +// t_customer_last_index is the index of t_customer +// +const char* const kTpccTables[] = {"t_item", "t_warehouse", "t_district", + "t_customer", "t_history", "t_stock", + "t_order", "t_orderline", "t_neworder", + "t_customer_last_index", "t_order_index", + "t_history_index"}; + +// StockLevel 4% 4 +// OrderStatus 4% 8 +// Delivery 4% 12 +// Payment 43% 55 +// NewOrder 45% 100 +const int kTpccTransactionRatios[] = {4, 8, 12, 55, 100}; + +// http://www.man7.org/linux/man-pages/man3/initstate.3.html +// Current "optimal" values for the size of the state array n +// are 8, 32, 64, 128, and 256 bytes; +const int kRandomStateSize = 64; + +// YTD +const float kInitYTD = 300000.00f; + +// tax +const float kTaxMax = 0.20f; +const float kTaxMin = 0.10f; +const int kTaxDigits = 2; + +// address +const int kStreetLowerLen = 10; +const int kStreetUpperLen = 20; +const int kCityLowerLen = 10; +const int kCityUpperLen = 20; +const int kStateLen = 2; +const int kZipLen = 9; + +// warehourse +const int kMaxWarehouseId = 100; +const int kWareHouseNameLowerLen = 6; +const int kWareHouseNameUpperLen = 10; + +// stock +const int kMaxQuantity = 100; +const int kMinQuantity = 10; +const int kDistLen = 24; +const int kStockDataLowerLen = 26; +const int kStockDataUpperLen = 50; +const int kMinStockLevelThreshold = 10; +const int kMaxStockLevelThreshold = 20; + +// item +const int kItemCount = 100000; +const int kItemMaxIm = 10000; +const int kItemMinIm = 1; +const float kItemMaxPrice = 100.00; +const float kItemMinPrice = 1.00; +const int kItemPriceDigits = 2; +const int kItemMaxNameLen = 24; +const int kItemMinNameLen = 14; +const int kItemMaxDataLen = 50; +const int kItemMinDataLen = 26; + +// district +const int kDistrictCountPerWarehouse = 10; +const int kDistrictNameLowerLen = 6; +const int kDistrictNameUpperLen = 10; + +// customer +const int kCustomerCountPerDistrict = 3000; +const float kInitCreditLimit = 5000.00; +const float kMaxDisCount = 0.0; +const float kMinDisCount = 0.5; +const int kDisCountDigits = 2; +const float kInitBalance = -10.00; +const float kInitYTDPayment = 10.00; +const int kInitPaymentCnt = 1; +const int kInitDeliveryCnt = 0; +const int kFirstLowerLen = 6; +const int kFirstUpperLen = 10; +const int kMiddleLen = 2; +const int kLastLen = 16; +const int kPhoneLen = 16; +const int kCreditLen = 2; +const int kCustomerDataUpperLen = 500; +const int kCustomerDataLowerLen = 300; + +// order +const int kInitOrdersPerDistrict = 3000; +const int kInitAllLocal = 1; +const int kMaxCarrierId = 10; +const int kMinCarrierId = 1; +const int kMaxOrderLineCnt = 15; +const int kMinOrderLineCnt = 5; + +// new order +const int kInitNewOrderCountPerDistrict = 900; + +// order line +const int kMaxItemId = 100000; +const int kMinItemId = 1; +const int kInitQuantity = 5; +const int kMaxOrderLineQuantity = 10; +const float kOrderLineMinAmount = 0.01f; +const float kOrderLineMaxAmount = 9999.99f; +const int kOrderLineAmountDigits = 2; + +// history +const float kInitHistoryAmount = 10.00f; +const int kHistoryDataLowerLen = 12; +const int kHistoryDataUpperLen = 24; + +// runtime h_amount +const float kRuntimeMaxAmount = 5000.00f; +const float kRuntimeMinAmount = 1.00f; +const int kRuntimeAmountDigits = 2; + +} // namespace tpcc +} // namepsace tera + +#endif /* TERA_BENCHMARK_TPCC_TPCC_TYPES_H */ diff --git a/src/benchmark/tpcc/tpccdb.cc b/src/benchmark/tpcc/tpccdb.cc new file mode 100644 index 000000000..bb7e0cfb5 --- /dev/null +++ b/src/benchmark/tpcc/tpccdb.cc @@ -0,0 +1,360 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include + +#include "benchmark/tpcc/mock_tpccdb.h" +#include "benchmark/tpcc/tera_tpccdb.h" +#include "benchmark/tpcc/tpccdb.h" + +namespace tera { +namespace tpcc { + +class TeraTpccDb; +class MockTpccDb; + +/// ------------------------- [begin item table] -------------------------- /// +std::string Item::ToString() const { + std::stringstream ss; + ss << "i_id = " << i_id + << ",i_im_id = " << i_im_id + << ",i_price = " << i_price + << ",i_name = " << i_name + << ",i_data = " << i_data; + return ss.str(); +} + +/// ------------------------- [begin warehouse table] --------------------- /// +std::string Warehouse::ToString() const { + std::stringstream ss; + ss << "w_id = " << w_id + << ",w_tax = " << w_tax + << ",w_ytd = " << w_ytd + << ",w_name = " << w_name + << ",w_street_1 = " << w_street_1 + << ",w_street_2 = " << w_street_2 + << ",w_city = " << w_city + << ",w_state = " << w_state + << ",w_zip = " << w_zip; + return ss.str(); +} + +/// ------------------------- [begin district table] ---------------------- /// + +District::District(int32_t id, int32_t w_id, RandomGenerator* rand_gen) + : d_id(id), d_w_id(w_id), d_ytd(kInitYTD), d_next_o_id(kCustomerCountPerDistrict + 1) { + d_tax = GenTax(rand_gen); + d_name = rand_gen->MakeAString(kDistrictNameLowerLen, kDistrictNameUpperLen); + d_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + d_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + d_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); + d_state = rand_gen->MakeAString(kStateLen,kStateLen); + d_zip = GenZip(rand_gen); +} + +std::string District::PrimaryKey() const { + return std::to_string(d_w_id) + "_" + + std::to_string(d_id); +} + +std::string District::ForeignKey() const { + return std::to_string(d_w_id); +} + +std::string District::ToString() const { + std::stringstream ss; + ss << "d_id = " << d_id + << ",d_w_id = " << d_w_id + << ",d_tax = " << d_tax + << ",d_ytd = " << d_ytd + << ",d_next_o_id = " << d_next_o_id + << ",d_name = " << d_name + << ",d_street_1 = " << d_street_1 + << ",d_street_2 = " << d_street_2 + << ",d_city = " << d_city + << ",d_state = " << d_state + << ",d_zip = " << d_zip; + return ss.str(); +} + +/// ------------------------- [begin stock table] ------------------------- /// + +Stock::Stock(int32_t id, int32_t w_id, bool is_original, RandomGenerator* rand_gen) + : s_i_id (id), s_w_id(w_id) { + s_quantity = rand_gen->GetRandom(kMinQuantity, kMaxQuantity); + s_ytd = 0; + s_order_cnt = 0; + s_remote_cnt = 0; + for (int i = 0; i < kDistrictCountPerWarehouse; ++i) { + s_dist.push_back(rand_gen->MakeAString(kDistLen, kDistLen)); + } + s_data = GenData(rand_gen, kStockDataLowerLen, kStockDataUpperLen, is_original); +} + +std::string Stock::PrimaryKey() const { + return std::to_string(s_w_id) + "_" + std::to_string(s_i_id); +} + +std::string Stock::ForeignKey() const { + return std::to_string(s_i_id); +} + +std::string Stock::ToString() const { + std::stringstream ss; + ss << "s_w_id = " << s_w_id + << ",s_quantity = " << s_quantity + << ",s_ytd = " << s_ytd + << ",s_order_cnt = " << s_order_cnt + << ",s_remote_cnt = " << s_remote_cnt + << ",s_data = " << s_data + << ",s_dist = ["; + for (auto d : s_dist) { + ss << d << ","; + } + ss << "]"; + return ss.str(); +} + +/// ------------------------- [begin order table] ------------------------- /// + +Order::Order(int32_t id, int32_t c_id, int32_t d_id, int32_t w_id, + bool new_order, const std::string& datetime, + RandomGenerator* rand_gen) + : o_id(id), o_c_id(c_id), o_d_id(d_id), o_w_id(w_id), + o_carrier_id(0), o_all_local(kInitAllLocal), + o_entry_d(datetime) { + + if (!new_order) { + o_carrier_id = rand_gen->GetRandom(kMinCarrierId, kMaxCarrierId); + } + o_ol_cnt = rand_gen->GetRandom(kMinOrderLineCnt, kMaxOrderLineCnt); +} + +std::string Order::PrimaryKey() const { + return std::to_string(o_w_id) + "_" + + std::to_string(o_d_id) + "_" + + std::to_string(o_id); +} + +std::string Order::ForeignKey() const { + return std::to_string(o_w_id) + "_" + + std::to_string(o_d_id) + "_" + + std::to_string(o_c_id); +} + +std::string Order::ToString() const { + std::stringstream ss; + ss << "o_id = " << o_id + << ",o_c_id = " << o_c_id + << ",o_d_id = " << o_d_id + << ",o_w_id = " << o_w_id + << ",o_carrier_id = " << o_carrier_id + << ",o_ol_cnt = " << o_ol_cnt + << ",o_all_local = " << o_all_local + << ",o_entry_d = " << o_entry_d; + return ss.str(); +} + +/// ------------------------- [begin neworder table] ---------------------- /// + + +NewOrder::NewOrder(int32_t o_id, int32_t d_id, int32_t w_id) + : no_o_id(o_id), no_d_id(d_id), no_w_id(w_id) { +} + +std::string NewOrder::ToString() const { + std::stringstream ss; + ss << "no_o_id = " << no_o_id + << ",no_d_id = " << no_d_id + << ",no_w_id = " << no_w_id; + return ss.str(); +} + +std::string NewOrder::PrimaryKey() const { + return std::to_string(no_w_id) + + "_" + std::to_string(no_d_id) + + "_" + std::to_string(no_o_id); +} + +std::string NewOrder::ForeignKey() const { + return std::to_string(no_w_id) + + "_" + std::to_string(no_d_id) + + "_" + std::to_string(no_o_id); +} + +/// ------------------------- [begin orderline table] --------------------- /// + +OrderLine::OrderLine(int32_t o_id, int32_t d_id, int32_t w_id, int32_t number, + bool new_order, const std::string& datetime, + RandomGenerator* rand_gen) + : ol_o_id(o_id), ol_d_id(d_id), ol_w_id(w_id), ol_number(number), + ol_supply_w_id(w_id), ol_quantity(kInitQuantity), + ol_amount(0.00f), ol_delivery_d(datetime) { + + ol_i_id = rand_gen->GetRandom(kMinItemId, kMaxItemId); + if (new_order) { + ol_amount = rand_gen->MakeFloat(kOrderLineMinAmount, + kOrderLineMaxAmount, + kOrderLineAmountDigits); + ol_delivery_d = ""; + } + ol_dist_info = rand_gen->MakeAString(kDistLen, kDistLen); +} + +std::string OrderLine::PrimaryKey() const { + return std::to_string(ol_w_id) + "_" + + std::to_string(ol_d_id) + "_" + + std::to_string(ol_o_id) + "_" + + std::to_string(ol_number); +} + +ForeignKeyMap OrderLine::ForeignKeys() const { + ForeignKeyMap foreign_keys; + std::string order_index = std::to_string(ol_w_id) + "_" + + std::to_string(ol_d_id) + "_" + + std::to_string(ol_o_id); + std::string item_index = std::to_string(ol_supply_w_id) + "_" + + std::to_string(ol_i_id); + foreign_keys["order_index"] = order_index; + foreign_keys["item_index"] = item_index; + return foreign_keys; +} + +std::string OrderLine::ToString() const { + std::stringstream ss; + ss << "ol_o_id = " << ol_o_id + << ",ol_d_id = " << ol_d_id + << ",ol_w_id = " << ol_w_id + << ",ol_number = " << ol_number + << ",ol_i_id = " << ol_i_id + << ",ol_supply_w_id = " << ol_supply_w_id + << ",ol_quantity = " << ol_quantity + << ",ol_amount = " << ol_amount + << ",ol_delivery_d = " << ol_delivery_d + << ",ol_dist_info = " << ol_dist_info; + return ss.str(); +} + +/// ------------------------- [begin customer table] ---------------------- /// + +Customer::Customer(int32_t id, int32_t d_id, int32_t w_id, const std::string& datetime, + bool bad_credit, RandomGenerator* rand_gen) + : c_id(id), + c_d_id(d_id), + c_w_id(w_id), + c_credit_lim(kInitCreditLimit), + c_balance(kInitBalance), + c_ytd_payment(kInitYTDPayment), + c_payment_cnt(kInitPaymentCnt), + c_delivery_cnt(kInitDeliveryCnt), + c_middle("OE"), + c_since(datetime) { + c_discount = rand_gen->MakeFloat(kMinDisCount, kMaxDisCount, kDisCountDigits); + c_first = rand_gen->MakeAString(kFirstLowerLen, kFirstUpperLen); + c_last = GenLastName(rand_gen, (id <= 1000 ? id : kCustomerCountPerDistrict)); + c_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + c_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + c_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); + c_state = rand_gen->MakeAString(kStateLen,kStateLen); + c_zip = GenZip(rand_gen); + c_phone = rand_gen->MakeNString(kPhoneLen,kPhoneLen); + c_credit = bad_credit ? "BC" : "GC"; + c_data = GenData(rand_gen, kCustomerDataLowerLen, kCustomerDataUpperLen, false); +} + +std::string Customer::PrimaryKey() const { + return std::to_string(c_w_id) + "_" + std::to_string(c_d_id) + + "_" + std::to_string(c_id); +} + +std::string Customer::ForeignKey() const { + return std::to_string(c_w_id) + "_" + std::to_string(c_d_id); +} + +std::string Customer::ToString() const { + std::stringstream ss; + ss << "c_id = " << c_id + << ",c_d_id = " << c_d_id + << ",c_w_id = " << c_w_id + << ",c_credit_lim = " << c_credit_lim + << ",c_discount = " << c_discount + << ",c_balance = " << c_balance + << ",c_ytd_payment = " << c_ytd_payment + << ",c_payment_cnt = " << c_payment_cnt + << ",c_delivery_cnt = " << c_delivery_cnt + << ",c_name = [" << c_first << "," << c_middle << "," << c_last << "]" + << ",c_street_1 = " << c_street_1 + << ",c_street_2 = " << c_street_2 + << ",c_city = " << c_city + << ",c_state = " << c_state + << ",c_zip = " << c_zip + << ",c_phone = " << c_phone + << ",c_since = " << c_since + << ",c_credit = " << c_credit + << ",c_data = " << c_data; + return ss.str(); +} + +/// ------------------------- [begin history table] ----------------------- /// +std::string History::ToString() const { + std::stringstream ss; + ss << "h_c_id = " << h_c_id + << ",h_c_d_id = " << h_c_d_id + << ",h_c_w_id = " << h_c_w_id + << ",h_d_id = " << h_d_id + << ",h_w_id = " << h_w_id + << ",h_amount = " << h_amount + << ",h_date = " << h_date + << ",h_data = " << h_data; + return ss.str(); +} + +/// ------------------------- [end tables] -------------------------------- /// + +bool TxnResult::State() const { + return status_; +} + +void TxnResult::SetState(bool status) { + status_ = status; +} + +void TxnResult::SetReason(const std::string& reason) { + reason_ = reason; +} + +void StockLevelResult::SetLowStock(int low_stock) { + low_stock_ = low_stock; +} + +int StockLevelResult::LowStock() const { + return low_stock_; +} + +void PaymentResult::SetSingleLine(const RetTuples& single_line) { + single_line_ = single_line; +} + +void NewOrderResult::AddLine(const RetTuples& line) { + lines_.push_back(line); +} + +void NewOrderResult::SetSingleLine(const RetTuples& single_line) { + single_line_ = single_line; +} + +TpccDb* TpccDb::NewTpccDb(const std::string& db_type) { + if (db_type == "tera") { + return new TeraTpccDb(); + } else { + LOG(ERROR) << "not support db:" << db_type; + } + return NULL; +} + +} // namespace tpcc +} // namespace tera + diff --git a/src/benchmark/tpcc/tpccdb.h b/src/benchmark/tpcc/tpccdb.h new file mode 100644 index 000000000..93b3c32f3 --- /dev/null +++ b/src/benchmark/tpcc/tpccdb.h @@ -0,0 +1,471 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_BENCHMARK_TPCC_TPCCDB_H +#define TERA_BENCHMARK_TPCC_TPCCDB_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "benchmark/tpcc/random_generator.h" +#include "benchmark/tpcc/tpcc_types.h" + +namespace tera { +namespace tpcc { + +typedef std::unordered_set IdSet; +typedef std::unordered_map ForeignKeyMap; +typedef std::unordered_map RetTuples; + + +inline float GenTax(RandomGenerator* rand_gen) { + return rand_gen->MakeFloat(kTaxMax, kTaxMin, kTaxDigits); +} + +inline std::string GenZip(RandomGenerator* rand_gen) { + return rand_gen->MakeNString(kZipLen, kZipLen); +} + +inline std::string GenData(RandomGenerator* rand_gen, + int lower_len, + int upper_len, + bool is_original) { + std::string ret = rand_gen->MakeAString(lower_len, upper_len); + if (is_original) { + int pos = rand_gen->GetRandom(0, ret.size() - 8); + ret = ret.replace(pos, 8, "ORIGINAL"); + } + return ret; +} + +inline std::string GenLastName(RandomGenerator* rand_gen, int id) { + if (id > 999) { + id = rand_gen->NURand(255, 0, std::min(999, id - 1)); + } + std::vector labels = {"BAR", "OUGHT", "ABLE", "PRI", "PRES", + "ESE", "ANTI", "CALLY", "ATION", "EING"}; + return labels[id / 100] + labels[(id / 10) % 10] + labels[id % 10]; +} + +inline IdSet PickUniqueIdSet(RandomGenerator* rand_gen, size_t cnt, int lower_id, int upper_id) { + IdSet ids; + while(ids.size() < cnt) { + int tmp_id = rand_gen->GetRandom(lower_id, upper_id); + if (ids.find(tmp_id) == ids.end()) { + ids.insert(tmp_id); + } + } + return ids; +} + +struct Item { + int32_t i_id; + int32_t i_im_id; + float i_price; + std::string i_name; + std::string i_data; + + Item(int32_t id, bool is_original, RandomGenerator* rand_gen) : i_id(id) { + i_im_id = rand_gen->GetRandom(kItemMinIm, kItemMaxIm); + i_price = rand_gen->MakeFloat(kItemMinPrice, kItemMaxPrice, kItemPriceDigits); + i_name = rand_gen->MakeAString(kItemMinNameLen, kItemMaxNameLen); + i_data = GenData(rand_gen, kItemMinDataLen, kItemMaxDataLen, is_original); + } + + std::string PrimaryKey() const { return std::to_string(i_id); } + std::string ToString() const; +}; + +struct Warehouse { + int32_t w_id; + float w_tax; + float w_ytd; + std::string w_name; + std::string w_street_1; + std::string w_street_2; + std::string w_city; + std::string w_state; + std::string w_zip; + Warehouse(int32_t id, RandomGenerator* rand_gen) : w_id(id) { + w_tax = GenTax(rand_gen); + w_ytd = kInitYTD; + w_name = rand_gen->MakeAString(kWareHouseNameLowerLen, kWareHouseNameUpperLen); + w_street_1 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + w_street_2 = rand_gen->MakeAString(kStreetLowerLen, kStreetUpperLen); + w_city = rand_gen->MakeAString(kCityLowerLen, kCityUpperLen); + w_state = rand_gen->MakeAString(kStateLen,kStateLen); + w_zip = GenZip(rand_gen); + } + std::string PrimaryKey() const { return std::to_string(w_id); } + std::string ToString() const; +}; + +struct District { + int32_t d_id; + int32_t d_w_id; + float d_tax; + float d_ytd; + int32_t d_next_o_id; + std::string d_name; + std::string d_street_1; + std::string d_street_2; + std::string d_city; + std::string d_state; + std::string d_zip; + + District(int32_t id, int32_t w_id, RandomGenerator* rand_gen); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; +}; + +struct Stock { +int32_t s_i_id; + int32_t s_w_id; + int32_t s_quantity; + int32_t s_ytd; + int32_t s_order_cnt; + int32_t s_remote_cnt; + std::vector s_dist; + std::string s_data; + + Stock(int32_t id, int32_t w_id, bool is_original, RandomGenerator* rand_gen); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; +}; + +struct Customer { + int32_t c_id; + int32_t c_d_id; + int32_t c_w_id; + float c_credit_lim; + float c_discount; + float c_balance; + float c_ytd_payment; + int32_t c_payment_cnt; + int32_t c_delivery_cnt; + std::string c_first; + std::string c_middle; + std::string c_last; + std::string c_street_1; + std::string c_street_2; + std::string c_city; + std::string c_state; + std::string c_zip; + std::string c_phone; + std::string c_since; + std::string c_credit; + std::string c_data; + Customer(int32_t id, int32_t d_id, int32_t w_id, const std::string& datetime, + bool bad_credit, RandomGenerator* rand_gen); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; +}; + +struct Order { + int32_t o_id; + int32_t o_c_id; + int32_t o_d_id; + int32_t o_w_id; + int32_t o_carrier_id; + int32_t o_ol_cnt; + + // If the order includes only home order-lines, + // then O_ALL_LOCAL is set to 1, otherwise O_ALL_LOCAL is set to 0. + int32_t o_all_local; + std::string o_entry_d; + + Order(int32_t id, int32_t c_id, int32_t d_id, int32_t w_id, bool new_order, + const std::string& datetime, RandomGenerator* rand_gen); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; +}; + +// An order-line is said to be 'home' if it is supplied by the home warehouse +// (i.e., when OL_SUPPLY_W_ID equals O_W_ID). +// +// An order-line is said to be remote when it is supplied by a remote warehouse +// (i.e., when OL_SUPPLY_W_ID does not equal O_W_ID). +// +struct OrderLine { + int32_t ol_o_id; + int32_t ol_d_id; + int32_t ol_w_id; + int32_t ol_number; + int32_t ol_i_id; + int32_t ol_supply_w_id; + int32_t ol_quantity; + float ol_amount; + std::string ol_delivery_d; + std::string ol_dist_info; + + OrderLine(int32_t o_id, int32_t d_id, int32_t w_id, int32_t number, + bool new_order, const std::string& datetime, + RandomGenerator* rand_gen); + std::string PrimaryKey() const; + ForeignKeyMap ForeignKeys() const; + std::string ToString() const; +}; + +struct NewOrder { + int32_t no_o_id; + int32_t no_d_id; + int32_t no_w_id; + + NewOrder(int32_t o_id, int32_t d_id, int32_t w_id); + std::string PrimaryKey() const; + std::string ForeignKey() const; + std::string ToString() const; +}; + +struct History { + int32_t h_c_id; + int32_t h_c_d_id; + int32_t h_c_w_id; + int32_t h_d_id; + int32_t h_w_id; + float h_amount; + std::string h_date; + std::string h_data; + + History(int32_t c_id, int32_t d_id, int32_t w_id, const std::string& datetime, + RandomGenerator* rand_gen) + : h_c_id(c_id), h_c_d_id(d_id), h_c_w_id(w_id), h_d_id(d_id), h_w_id(w_id), + h_amount(kInitHistoryAmount), h_date(datetime) { + h_data = rand_gen->MakeAString(kHistoryDataLowerLen, kHistoryDataUpperLen); + } + std::string PrimaryKey() const { return std::to_string(h_c_id); } + std::string ToString() const; +}; + +struct NewOrderInfo { + bool need_failed; + int32_t o_all_local; + int32_t o_ol_cnt; + std::vector ol_supply_w_ids; + std::vector ol_i_ids; + std::vector ol_quantities; +}; + +enum TpccTables +{ + kItemTable = 0, + kWarehouseTable = 1, + kDistrictTable = 2, + kCustomerTable = 3, + kHistoryTable = 4, + kStockTable = 5, + kOrderTable = 6, + kOrderLineTable = 7, + kNewOrderTable = 8, + + // the index of table + kCustomerLastIndex = 9, + kOrderIndex = 10, + kHistoryIndex = 11 +}; + +/// ------------------------- transaction result ---------------------------/// + +class TxnResult { +public: + void SetState(bool status); + bool State() const; + void SetReason(const std::string& reason); + const std::string& Reason() const; +private: + bool status_; + std::string reason_; +}; + +class StockLevelResult : public TxnResult { +public: + void SetLowStock(int low_stock); + int LowStock() const; +private: + int low_stock_; +}; + +class PaymentResult : public TxnResult { +public: + void SetSingleLine(const RetTuples& single_line); +private: + RetTuples single_line_; +}; + +class NewOrderResult : public TxnResult { +public: + void AddLine(const RetTuples& line); + void SetSingleLine(const RetTuples& single_line); +private: + std::vector lines_; + RetTuples single_line_; +}; + +class OrderStatusResult : public TxnResult { + +}; + +class DeliveryResult : public TxnResult { + +}; + +class TpccDb { +public: + TpccDb(){} + virtual ~TpccDb(){} + + // init db + virtual bool CreateTables() = 0; + virtual bool CleanTables() = 0; + + // for insert table + virtual bool InsertItem(const Item& i) = 0; + + virtual bool InsertWarehouse(const Warehouse& w) = 0; + + virtual bool InsertDistrict(const District& d) = 0; + + virtual bool InsertCustomer(const Customer& c) = 0; + + virtual bool InsertHistory(const History& h) = 0; + + virtual bool InsertStock(const Stock& s) = 0; + + virtual bool InsertOrder(const Order& o) = 0; + + virtual bool InsertOrderLine(const OrderLine& ol) = 0; + + virtual bool InsertNewOrder(const NewOrder& no) = 0; + + // for transaction + + // The Stock-Level Transaction [Revision 5.11 - Page 44] + // + // (warehouse_id, district_id) + // is the primarykey of t_district + // Each terminal must use a unique value of (W_ID, D_ID) that is constant + // over the whole measurement, i.e., D_IDs cannot be re-used within a warehouse + // + // threshold + // The threshold of minimum quantity in stock (threshold) is selected + // at random within [10 .. 20]. + // + virtual void StockLevelTxn(int32_t warehouse_id, int32_t district_id, + int32_t threshold, + StockLevelResult* ret) = 0; + + // The Delivery Transaction [Revision 5.11 - Page 40] + // + // warehouse_id + // For any given terminal, the home warehouse number (W_ID) is constant + // over the whole measurement interval + // + // carrier_id + // The carrier number (O_CARRIER_ID) is randomly selected within [1 .. 10]. + // + // delivery_datetime + // The delivery date (OL_DELIVERY_D) is generated within the + // SUT by using the current system date and time. + // + virtual void DeliveryTxn(int32_t warehouse_id, + int32_t carrier_id, + const std::string& delivery_datetime, + DeliveryResult* ret) = 0; + + // The Order-Status Transaction [Revision 5.11 - Page 37] + // + // warehouse_id + // For any given terminal, the home warehouse number (W_ID) is constant + // over the whole measurement interval + // + // district_id + // The district number (D_ID) is randomly selected within [1 .. 10] + // from the home warehouse (D_W_ID = W_ID). + // + // c_warehouse_id, c_district_id, last_name + // customer is randomly selected + // 60% of the time by last name (C_W_ID, C_D_ID, C_LAST) + // from the selected district (C_D_ID = D_ID) + // and the home warehouse number (C_W_ID = W_ID). + // + // c_warehouse_id, c_district_id, customer_id + // 40% of the time by number (C_W_ID, C_D_ID, C_ID) + // from the selected district (C_D_ID = D_ID) + // and the home warehouse number (C_W_ID = W_ID). + // + virtual void OrderStatusTxn(bool by_last_name, + int32_t warehouse_id, int32_t district_id, + int32_t c_customer_id, + const std::string& last_name, + OrderStatusResult* ret) = 0; + + // The Payment Transaction [Revision 5.11 - Page 33] + // + // warehouse_id + // For any given terminal, the home warehouse number (W_ID) is constant + // over the whole measurement interval + // + // district_id + // The district number (D_ID) is randomly selected within [1 .. 10] + // from the home warehouse (D_W_ID = W_ID). + // + // c_warehouse_id, c_district_id, last_name + // The customer is randomly selected + // 1) 60% of the time by last name (C_W_ID , C_D_ID, C_LAST) + // c_warehouse_id, c_district_id, customer_id + // The customer is randomly selected + // 2) 40% of the time by number (C_W_ID , C_D_ID , C_ID). + // + // h_amount + // The payment amount (H_AMOUNT) is randomly selected within + // [1.00 .. 5,000.00]. + // + virtual void PaymentTxn(bool by_last_name, + int32_t warehouse_id, int32_t district_id, + int32_t c_warehouse_id, int32_t c_district_id, + int32_t c_customer_id, + const std::string& last_name, + int32_t h_amount, + PaymentResult* ret) = 0; + + + // The New-Order Transaction [Revision 5.11 - Page 28] + // warehouse_id + // For any given terminal, the home warehouse number (W_ID) is constant + // over the whole measurement interval + // + // district_id + // The district number (D_ID) is randomly selected within [1 .. 10] + // from the home warehouse (D_W_ID = W_ID). + // + // customer_id + // The non-uniform random customer number (C_ID) is selected using + // the NURand(1023,1,3000) function from the selected district + // number (C_D_ID = D_ID) and the home warehouse number (C_W_ID = W_ID). + // + virtual void NewOrderTxn(int32_t warehouse_id, + int32_t district_id, + int32_t customer_id, const NewOrderInfo& info, + NewOrderResult* ret) = 0; + + static TpccDb* NewTpccDb(const std::string& db_type); +}; + +} // namespace tpcc +} // namespace tera + +#endif /* TERA_BENCHMARK_TPCC_TPCCDB_H */ diff --git a/src/common/atomic.h b/src/common/atomic.h index 6837cb302..195a7b0da 100644 --- a/src/common/atomic.h +++ b/src/common/atomic.h @@ -1,11 +1,10 @@ +#pragma once // Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. +#include -#ifndef TERA_COUNTER_ATOMIC_H_ -#define TERA_COUNTER_ATOMIC_H_ - -namespace common { +namespace tera { static inline int atomic_add(volatile int *mem, int add) { @@ -106,5 +105,4 @@ static inline int64_t atomic_comp_swap64(volatile void *mem, int64_t xchg, int64 return cmp; } -} // namespace common -#endif // TERA_COMMON_ATOMIC_H_ +} diff --git a/src/common/counter.h b/src/common/counter.h index c9869f633..d4687bfd8 100644 --- a/src/common/counter.h +++ b/src/common/counter.h @@ -7,10 +7,10 @@ #include -#include "atomic.h" -#include "timer.h" +#include "common/atomic.h" +#include "common/timer.h" -namespace common { +namespace tera { class Counter { public: @@ -47,19 +47,19 @@ class AutoCounter { : counter_(counter), msg1_(msg1), msg2_(msg2) { - start_ = timer::get_micros(); + start_ = get_micros(); counter_->Inc(); } ~AutoCounter() { - int64_t end = timer::get_micros(); + int64_t end = get_micros(); if (end - start_ > 5000000) { int64_t t = (end - start_) / 1000000; if (!msg2_) { fprintf(stderr, "%s [AutoCounter] %s hang for %ld s\n", - timer::get_curtime_str().data(), msg1_, t); + get_curtime_str().data(), msg1_, t); } else { fprintf(stderr, "%s [AutoCounter] %s %s hang for %ld s\n", - timer::get_curtime_str().data(), msg1_, msg2_, t); + get_curtime_str().data(), msg1_, msg2_, t); } } counter_->Dec(); diff --git a/src/common/cpu_profiler.cc b/src/common/cpu_profiler.cc new file mode 100644 index 000000000..758ed674d --- /dev/null +++ b/src/common/cpu_profiler.cc @@ -0,0 +1,50 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "common/cpu_profiler.h" + +namespace tera { + +CpuProfiler::CpuProfiler() + : exit_(false), + thread_(&CpuProfiler::run, this) {} + +CpuProfiler::~CpuProfiler() { + exit_ = true; + cv_.notify_one(); + thread_.join(); + ProfilerState ps; + ProfilerGetCurrentState(&ps); + if (ps.enabled) { + ProfilerStop(); + } +} + +void CpuProfiler::run() { + while (!exit_.load()) { + if (enable_) { + ProfilerState ps; + ProfilerGetCurrentState(&ps); + if (ps.enabled == 0) { + ProfilerStart(profiler_file_.c_str()); + } + + ProfilerFlush(); + LOG(INFO) << "[Cpu Profiler] Cpu Profiler Dumped"; + } else { + ProfilerState ps; + ProfilerGetCurrentState(&ps); + if (ps.enabled) { + ProfilerStop(); + } + } + std::unique_lock lock(lock_); + cv_.wait_for(lock, interval_); + } +} + +} // namespace tera \ No newline at end of file diff --git a/src/common/cpu_profiler.h b/src/common/cpu_profiler.h new file mode 100644 index 000000000..ccf0686ab --- /dev/null +++ b/src/common/cpu_profiler.h @@ -0,0 +1,68 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_CPU_PROFILER_H +#define TERA_CPU_PROFILER_H + +#include +#include +#include +#include +#include + +#include +#include + +namespace tera { + +class CpuProfiler { +public: + /** + * @brief Init CpuProfiler and the detect thread will start + **/ + CpuProfiler(); + + ~CpuProfiler(); + + CpuProfiler& SetEnable(bool enable) { + enable_ = enable; + if (enable_) { + LOG(INFO) << "[Cpu Profiler] Cpu Profiler Enabled"; + } else { + LOG(INFO) << "[Cpu Profiler] Cpu Profiler Disabled"; + } + cv_.notify_one(); + return *this; + } + + CpuProfiler& SetInterval(int second) { + interval_ = std::chrono::seconds(second); + cv_.notify_one(); + return *this; + } + + CpuProfiler& SetProfilerFile(const std::string& file) { + profiler_file_ = file; + cv_.notify_one(); + return *this; + } + +private: + void run(); + +private: + std::atomic exit_; + bool enable_{false}; + std::chrono::seconds interval_{10}; + std::string profiler_file_; + std::thread thread_; + std::mutex lock_; + std::condition_variable cv_; +}; + +} // namespace tera + +#endif //TERA_CPU_PROFILER_H + +/* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/common/event.h b/src/common/event.h index a289d0278..9a6770ece 100644 --- a/src/common/event.h +++ b/src/common/event.h @@ -46,8 +46,75 @@ class AutoResetEvent { bool signaled_; }; +class CompletedEvent { +public: + CompletedEvent() + : cv_(&mutex_), cnt_(0), triggered_(false) {} + + CompletedEvent(int64_t task_cnt) + : cv_(&mutex_), cnt_(task_cnt), triggered_(false) {} + + // add event source, + // tasks maybe add while others finished or doing, like a task queue + void AddEventSources(int64_t task_cnt) { + MutexLock lock(&mutex_); + if (!triggered_) { + cnt_ += task_cnt; + } + } + + // call after all tasks added to EventSource, + // trigger other thread's Wait() function take effect. + void Trigger() { + MutexLock lock(&mutex_); + triggered_ = true; + if (cnt_ <= 0) { + cv_.Signal(); + } + } + + // wait until cnt_ == 0 and triggered_ == true + void Wait() { + MutexLock lock(&mutex_); + // cnt_ > 0 + while (cnt_ > 0 || !triggered_) { + cv_.Wait(); + } + } + + // wait for 'timeout' ms, don't careful cnt_ and triggered_ + // if last event source completed, this will returned early 'timeout' + bool TimeWait(int64_t timeout) { + MutexLock lock(&mutex_); + if (cnt_ > 0 || !triggered_) { + cv_.TimeWait(timeout); + } + return cnt_ > 0 ? false : true; + } + + // last event source complated and triggered_ == true, will be notify + // Wait or TimeWait + void Complete(int64_t task_cnt = 1) { + MutexLock lock(&mutex_); + cnt_ -= task_cnt; + // use 'triggered_' to make sure all tasks call 'AddEventSources' + if (cnt_ <= 0 && triggered_) { + cv_.Signal(); + } + } + +private: + CompletedEvent(const CompletedEvent&) = delete; + CompletedEvent &operator=(const CompletedEvent&) = delete; + Mutex mutex_; + CondVar cv_; + int64_t cnt_; + bool triggered_; +}; + } // namespace common using common::AutoResetEvent; +using common::CompletedEvent; #endif // TERA_COMMON_EVENT_H_ diff --git a/src/common/file/file_path.cc b/src/common/file/file_path.cc index 44738117f..ea3a8ef08 100644 --- a/src/common/file/file_path.cc +++ b/src/common/file/file_path.cc @@ -146,6 +146,33 @@ bool ListCurrentDir(const std::string& dir_path, return true; } +bool ListCurrentDirWithStat(const std::string& dir_path, + std::vector* file_list) { + DIR *dir = NULL; + struct dirent *ptr = NULL; + dir = opendir(dir_path.c_str()); + if (dir == NULL) { + return false; + } + bool stat_all_succ = true; + while ((ptr = readdir(dir)) != NULL) { + if (strcmp(ptr->d_name, ".") != 0 && strcmp(ptr->d_name, "..") != 0) { + struct stat st; + std::string file_name(ptr->d_name); + file_name = dir_path + "/" + file_name; + if (lstat(file_name.c_str(), &st) == 0) { + file_list->push_back(std::make_pair(file_name, st)); + } else { + // break if stat fail and return false later + stat_all_succ = false; + break; + } + } + } + closedir(dir); + return stat_all_succ; +} + bool IsExist(const std::string& path) { return access(path.c_str(), R_OK) == 0; } diff --git a/src/common/file/file_path.h b/src/common/file/file_path.h index e0ab5d002..d5e04ea99 100644 --- a/src/common/file/file_path.h +++ b/src/common/file/file_path.h @@ -8,6 +8,8 @@ #include #include #include +#include +#include void SplitStringPath(const std::string& full_path, std::string* dir_part, @@ -28,6 +30,11 @@ std::string UidToName(uid_t uid); bool ListCurrentDir(const std::string& dir_path, std::vector* file_list); +typedef std::pair FileStateInfo; + +bool ListCurrentDirWithStat(const std::string& dir_path, + std::vector* file_list); + bool IsExist(const std::string& path); bool IsDir(const std::string& path); diff --git a/src/common/heap_profiler.cc b/src/common/heap_profiler.cc new file mode 100644 index 000000000..386e314e9 --- /dev/null +++ b/src/common/heap_profiler.cc @@ -0,0 +1,51 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include + +#include "common/heap_profiler.h" + +namespace tera { + +HeapProfiler::HeapProfiler() + : exit_(false), + thread_(&HeapProfiler::run, this) {} + +HeapProfiler::~HeapProfiler() { + exit_ = true; + cv_.notify_one(); + thread_.join(); + if (IsHeapProfilerRunning()) { + HeapProfilerStop(); + } +} + +void HeapProfiler::run() { + while (!exit_.load()) { + if (enable_) { + // "reason" is time + std::time_t t = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); + char ts[128]; + ctime_r(&t, ts); + ts[strlen(ts) - 1] = '\0'; // erase \n + + if (IsHeapProfilerRunning() == 0) { + HeapProfilerStart(profiler_file_.c_str()); + } + HeapProfilerDump(ts); + LOG(INFO) << "[Heap Profiler] Heap Profiler Dumped"; + } else { + if (IsHeapProfilerRunning()) { + HeapProfilerStop(); + } + } + std::unique_lock lock(lock_); + cv_.wait_for(lock, interval_); + } +} + +} // namespace tera \ No newline at end of file diff --git a/src/common/heap_profiler.h b/src/common/heap_profiler.h new file mode 100644 index 000000000..f5ffa9c6b --- /dev/null +++ b/src/common/heap_profiler.h @@ -0,0 +1,90 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_HEAP_PROFILER_H +#define TERA_HEAP_PROFILER_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +DECLARE_int64(heap_profile_allocation_interval); +DECLARE_int64(heap_profile_inuse_interval); + +namespace tera { + +class HeapProfiler { +public: + + /** + * @brief Init HeapProfiler and the detect thread will start + **/ + HeapProfiler(); + /** + * @brief: the heap profiler will stop after descontrucor called + * + **/ + ~HeapProfiler(); + + HeapProfiler& SetEnable(bool enable) { + enable_ = enable; + + if (enable_) { + setenv("HEAP_PROFILE_ALLOCATION_INTERVAL", + std::to_string(FLAGS_heap_profile_allocation_interval).c_str(), + 1); + + setenv("HEAP_PROFILE_INUSE_INTERVAL", + std::to_string(FLAGS_heap_profile_inuse_interval).c_str(), + 1); + + LOG(INFO) << "[Heap Profiler] HEAP_PROFILE_ALLOCATION_INTERVAL: " + << getenv("HEAP_PROFILE_ALLOCATION_INTERVAL"); + LOG(INFO) << "[Heap Profiler] HEAP_PROFILE_INUSE_INTERVAL: " + << getenv("HEAP_PROFILE_INUSE_INTERVAL"); + LOG(INFO) << "[Heap Profiler] Heap Profiler Enabled"; + } else { + unsetenv("HEAP_PROFILE_ALLOCATION_INTERVAL"); + unsetenv("HEAP_PROFILE_INUSE_INTERVAL"); + LOG(INFO) << "[Heap Profiler] Heap Profiler Disabled"; + } + cv_.notify_one(); + return *this; + } + + HeapProfiler& SetInterval(int second) { + interval_ = std::chrono::seconds(second); + cv_.notify_one(); + return *this; + } + + HeapProfiler& SetProfilerFile(const std::string& file) { + profiler_file_ = file; + cv_.notify_one(); + return *this; + } + +private: + void run(); +private: + std::atomic exit_; + bool enable_{false}; + std::chrono::seconds interval_{10}; + std::string profiler_file_; + std::thread thread_; + std::mutex lock_; + std::condition_variable cv_; +}; + +} // namespace tera + +#endif //TERA_HEAP_PROFILER + +/* vim: set ts=4 sw=4 sts=4 tw=100 */ \ No newline at end of file diff --git a/src/common/log/log_cleaner.cc b/src/common/log/log_cleaner.cc new file mode 100644 index 000000000..6b5474a1d --- /dev/null +++ b/src/common/log/log_cleaner.cc @@ -0,0 +1,322 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "common/log/log_cleaner.h" + +#include +#include +#include +#include + +#include + +#include "common/file/file_path.h" +#include "common/timer.h" + +DECLARE_string(log_dir); +DECLARE_string(tera_log_prefix); +DECLARE_string(tera_leveldb_log_path); +DECLARE_int64(tera_info_log_clean_period_second); +DECLARE_int64(tera_info_log_expire_second); +DECLARE_string(ins_log_file); + +namespace common { + +static const int64_t kMinCleanPeriodMs = 1000; // 1s +static const int64_t kMinInfoLogExpireSec = 1; // 1s +static const size_t kPathMaxLen = 64; + +Mutex LogCleaner::inst_init_mutex_; +LogCleaner* LogCleaner::singleton_instance_ = NULL; + +static std::string GetProcFdPath() { + char path_buf[kPathMaxLen]; + snprintf(path_buf, kPathMaxLen, "/proc/%d/fd", getpid()); + return std::string(path_buf); +} + +static std::string GetFileNameFromPath(const std::string& path) { + std::string::size_type pos = path.rfind("/"); + if (pos == std::string::npos) { + return path; + } else { + return path.substr(pos + 1); + } +} + + +LogCleaner* LogCleaner::GetInstance(ThreadPool *thread_pool) { + if (singleton_instance_ == NULL) { + singleton_instance_ = new LogCleaner(FLAGS_log_dir, + FLAGS_tera_info_log_clean_period_second, + FLAGS_tera_info_log_expire_second, + thread_pool); + singleton_instance_->AddPrefix(FLAGS_tera_log_prefix); + singleton_instance_->AddPrefix(GetFileNameFromPath(FLAGS_tera_leveldb_log_path)); + singleton_instance_->AddPrefix(GetFileNameFromPath(FLAGS_ins_log_file)); + } + return singleton_instance_; +} + +bool LogCleaner::StartCleaner(ThreadPool *thread_pool) { + return GetInstance()->Start(); +} + +void LogCleaner::StopCleaner() { + MutexLock l(&inst_init_mutex_, "Destroy log cleaner"); + if (singleton_instance_ != NULL) { + singleton_instance_->Stop(); + delete singleton_instance_; + singleton_instance_ = NULL; + } +} + +LogCleaner::LogCleaner(const std::string& log_dir, + int64_t period_second, + int64_t expire_second, + ThreadPool *thread_pool) + : thread_pool_(thread_pool), + thread_pool_own_(false), + mutex_(), + info_log_dir_(log_dir), + log_prefix_list_(), + info_log_clean_period_ms_(std::max(period_second * 1000, kMinCleanPeriodMs)), + info_log_expire_sec_(std::max(expire_second, kMinInfoLogExpireSec)), + stop_(false), + bg_exit_(false), + bg_cond_(&mutex_), + bg_func_(std::bind(&LogCleaner::CleanTaskWrap, this)), + bg_task_id_(-1), + proc_fd_path_(GetProcFdPath()) {} + +LogCleaner::~LogCleaner() { + DestroyOwnThreadPool(); +} + +static bool CheckDirPath(const std::string &dir_path) { + return !dir_path.empty() && IsDir(dir_path); +} + +bool LogCleaner::CheckOptions() const { + return CheckDirPath(info_log_dir_) && + info_log_clean_period_ms_ > 0 && + info_log_expire_sec_ > 0; +} + +bool LogCleaner::Start() { + if (!CheckOptions()) { + return false; + } + + MutexLock l(&mutex_, "Start info log cleaner"); + + // double check + if (IsRunning()) { + return true; + } + + stop_ = false; + bg_exit_ = false; + if (nullptr == thread_pool_) { + NewThreadPool(); + } + + if (bg_task_id_ <= 0) { + // start immediately + bg_task_id_ = thread_pool_->DelayTask(0, bg_func_); + } + return true; +} + +void LogCleaner::Stop() { + MutexLock l(&mutex_, "Stop info log cleaner"); + stop_ = true; + bool is_running = false; + if (bg_task_id_ > 0) { + bg_exit_ = thread_pool_->CancelTask(bg_task_id_, true, &is_running); + } else { + bg_exit_ = true; + } + + CHECK(is_running || bg_exit_); + while(!bg_exit_) { + bg_cond_.Wait(); + } + bg_task_id_ = -1; +} + +void LogCleaner::CleanTaskWrap() { + MutexLock l(&mutex_); + DoCleanLocalLogs(); + if (stop_) { + bg_task_id_ = -1; + bg_exit_ = true; + } else { + bg_task_id_ = thread_pool_->DelayTask(info_log_clean_period_ms_, bg_func_); + } + bg_cond_.Signal(); +} + +bool LogCleaner::CheckLogPrefix(const std::string& filename) const { + std::set::const_iterator prefix_iter = log_prefix_list_.begin(); + for (; prefix_iter != log_prefix_list_.end(); ++prefix_iter) { + const std::string& prefix = *prefix_iter; + if (filename.size() < prefix.size()) { + // do not need to compare + continue; + } + + if (strncmp(prefix.c_str(), filename.c_str(), prefix.size()) == 0) { + // return true if match any prefix + return true; + } + } + return false; +} + +bool LogCleaner::DoCleanLocalLogs() { + if (log_prefix_list_.empty()) { + LOG(WARNING) << "[LogCleaner] Log prefix is not set yet."; + return false; + } + if (!CheckDirPath(info_log_dir_) || IsEmpty(info_log_dir_)) { + LOG(WARNING) << "[LogCleaner] Log dir " << info_log_dir_ << " not exsit logs."; + return false; + } + int64_t now_time = tera::get_millis() / 1000; + int64_t clean_time = now_time - info_log_expire_sec_; + LOG(INFO) << "[LogCleaner] Start clean log dir: " << info_log_dir_ + << ", now_time = " << now_time + << ", clean_time = " << clean_time; + + long path_maxlen = pathconf(info_log_dir_.c_str(), _PC_PATH_MAX); + std::vector log_file_list; + if (!ListCurrentDir(info_log_dir_, &log_file_list)) { + // list failed + LOG(WARNING) << "[LogCleaner] List log dir " << info_log_dir_ + << " failed. Cancel clean."; + return false; + } + + // reserved_set: filenames that should not to be clean + std::set reserved_set; + if (!GetCurrentOpendLogs(&reserved_set)) { + LOG(WARNING) << "[LogCleaner] GetCurrentOpendLogs failed. Cancel clean."; + return false; + } + + std::vector::const_iterator it = log_file_list.begin(); + for (; it != log_file_list.end(); ++it) { + if (reserved_set.find(*it) != reserved_set.end()) { + // already reserved + continue; + } + + const std::string& file_name = *it; + + // check if filename start with log_prefix_ + // if leveldb_log_prefix_ is not empty, check also + if (!CheckLogPrefix(file_name)) { + VLOG(16) << "[LogCleaner] Reserve log file: " << file_name + << ", which not match prefix."; + reserved_set.insert(file_name); + continue; + } + + // get file stat + std::string file_path = info_log_dir_ + "/" + file_name; + struct stat file_st; + if (lstat(file_path.c_str(), &file_st) != 0) { + // cancel clean if any file stat failed + LOG(WARNING) << "[LogCleaner] Stat log file: " << file_path << " fail. Cancel log clean."; + return false; + } + + if (S_ISLNK(file_st.st_mode)) { + // handle symbolic link + VLOG(16) << "[LogCleaner] Reserve symbolic link log: " << file_name; + reserved_set.insert(file_name); + char path_buf[path_maxlen]; + int ret = readlink(file_path.c_str(), path_buf, path_maxlen); + if (ret < 0 || ret >= path_maxlen) { + continue; + } else { + // reserve link target + path_buf[ret] = '\0'; + std::string target_filename = GetFileNameFromPath(path_buf); + VLOG(16) << "[LogCleaner] Reserve link target: " << target_filename + << " for link: " << file_path; + reserved_set.insert(target_filename); + } + } else if (!S_ISREG(file_st.st_mode)) { + VLOG(16) << "[LogCleaner] Reserve not regular file: " << file_name; + reserved_set.insert(file_name); + } else if (file_st.st_mtime >= clean_time) { + VLOG(16) << "[LogCleaner] Reserve not expire log: " << file_name + << ", mtime: " << file_st.st_mtime << ", clean_time: " << clean_time; + reserved_set.insert(file_name); + } + VLOG(16) << "stat filename: " << file_name + << ", is_symbolic_link: " << S_ISLNK(file_st.st_mode) + << ", is_dir: " << S_ISDIR(file_st.st_mode) + << ", is_regular_file: " << S_ISREG(file_st.st_mode) + << ", last mod time: " << file_st.st_mtime + << ", link number: " << file_st.st_nlink + << ", reserve: " << (reserved_set.find(file_name) != reserved_set.end()); + } + + // clean log + size_t clean_cnt = 0; + it = log_file_list.begin(); + for (; it != log_file_list.end(); ++it) { + const std::string &file_name = *it; + std::string file_path = info_log_dir_ + "/" + file_name; + if (reserved_set.find(file_name) == reserved_set.end()) { + LOG(INFO) << "[LogCleaner] log: " << file_path << " will be clean"; + if (!RemoveLocalFile(file_path)){ + LOG(WARNING) << "[LogCleaner] log clean fail: " << file_path; + } else { + ++clean_cnt; + } + } + } + LOG(INFO) << "[LogCleaner] Found log: " << log_file_list.size() + << ", clean: " << clean_cnt; + return true; +} + +bool LogCleaner::GetCurrentOpendLogs(std::set* opend_logs) { + long path_maxlen = pathconf(proc_fd_path_.c_str(), _PC_PATH_MAX); + if (path_maxlen < 0) { + LOG(ERROR) << "[LogCleaner] Get Path Max Len Failed"; + return false; + } + std::vector opend_logs_list; + VLOG(16) << "[LogCleaner] Search fd_path: " << proc_fd_path_; + if (!ListCurrentDirWithStat(proc_fd_path_, &opend_logs_list)) { + VLOG(16) << "[LogCleaner] list fd_path: " << proc_fd_path_ << " failed."; + return false; + } + + std::vector::const_iterator it = opend_logs_list.begin(); + for (; it != opend_logs_list.end(); ++it) { + const std::string& filename = it->first; + const struct stat& st = it->second; + if (S_ISLNK(st.st_mode)) { + char path_buf[path_maxlen]; + int ret = readlink(filename.c_str(), path_buf, path_maxlen); + if (ret > 0 && ret < path_maxlen && path_buf[0] == '/') { + path_buf[ret] = '\0'; + std::string target_filename = GetFileNameFromPath(path_buf); + VLOG(16) << "[LogCleaner] Reserve log in use: " << target_filename; + opend_logs->insert(target_filename); + } + } + } + return true; +} + +} // end namespace common + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/log/log_cleaner.h b/src/common/log/log_cleaner.h new file mode 100644 index 000000000..53830a733 --- /dev/null +++ b/src/common/log/log_cleaner.h @@ -0,0 +1,114 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_COMMON_LOG_CLEANER_H_ +#define TERA_COMMON_LOG_CLEANER_H_ + +#include +#include +#include + +#include "common/mutex.h" +#include "common/thread_pool.h" + +namespace common { + +class LogCleaner { +private: + // set private since singleton + LogCleaner(const std::string& log_dir, + int64_t period_second, + int64_t expire_second, + ThreadPool* thread_pool); + ~LogCleaner(); + // disallow copy + LogCleaner(const LogCleaner& other) = delete; + LogCleaner & operator = (const LogCleaner& other) = delete; + +public: + bool CheckOptions() const; + bool Start(); + void Stop(); + bool IsRunning() const { return bg_task_id_ > 0; } + + bool AddPrefix(const std::string& prefix) { + if (prefix.empty()) { + // empty prefix is not allowed + return false; + } else { + MutexLock l(&mutex_); + log_prefix_list_.insert(prefix); + return true; + } + } + + void RemovePrefix(const std::string& prefix) { + MutexLock l(&mutex_); + log_prefix_list_.erase(prefix); + } + +private: + // singleton + static Mutex inst_init_mutex_; + static LogCleaner* singleton_instance_; + + // get singleton instance but not start + // for unittest + static LogCleaner* GetInstance(ThreadPool *thread_pool = NULL); + +public: + static bool StartCleaner(ThreadPool *thread_pool = NULL); + static void StopCleaner(); + +private: + // do under lock + void NewThreadPool() { + if (NULL == thread_pool_) { + thread_pool_ = new ThreadPool(1); + thread_pool_own_ = true; + } + } + void DestroyOwnThreadPool() { + if (thread_pool_own_ && NULL != thread_pool_) { + thread_pool_->Stop(true); + delete thread_pool_; + thread_pool_ = NULL; + thread_pool_own_ = false; + } + } + + void CleanTaskWrap(); + + bool CheckLogPrefix(const std::string& filename) const; + + bool DoCleanLocalLogs(); + + bool GetCurrentOpendLogs(std::set* opend_logs); + +private: + ThreadPool* thread_pool_; + bool thread_pool_own_; + mutable Mutex mutex_; + + // options + std::string info_log_dir_; + std::set log_prefix_list_; + int64_t info_log_clean_period_ms_; // milli second + int64_t info_log_expire_sec_; // second + + bool stop_; + bool bg_exit_; + CondVar bg_cond_; + const ThreadPool::Task bg_func_; + int64_t bg_task_id_; + + std::string proc_fd_path_; +}; + +} // end namespace common + +#endif // TERA_COMMON_LOG_CLEANER_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/cache_collector.h b/src/common/metric/cache_collector.h new file mode 100644 index 000000000..ae415b0d8 --- /dev/null +++ b/src/common/metric/cache_collector.h @@ -0,0 +1,108 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_COMMOM_METRIC_CACHE_COLLECTOR_H_ +#define TERA_COMMOM_METRIC_CACHE_COLLECTOR_H_ + +#include +#include + +#include "common/metric/collector_report_publisher.h" +#include "common/metric/collector.h" +#include "db/table_cache.h" +#include "leveldb/cache.h" + +namespace tera { + +enum class CacheCollectType { + kHitRate, + kEntries, + kCharge, +}; + +class BaseCacheCollector : public Collector { +public: + explicit BaseCacheCollector(CacheCollectType cache_type) : cache_type_(cache_type) {} + virtual ~BaseCacheCollector() {} + + virtual int64_t Collect() { + switch (cache_type_) { + case CacheCollectType::kHitRate: + return HitRate(); + case CacheCollectType::kEntries: + return Entries(); + case CacheCollectType::kCharge: + return TotalCharge(); + default: + return 0; + } + } + +protected: + virtual int64_t HitRate() = 0; + virtual int64_t Entries() = 0; + virtual int64_t TotalCharge() = 0; + +protected: + CacheCollectType cache_type_; +}; + +class LRUCacheCollector : public BaseCacheCollector { +public: + LRUCacheCollector(leveldb::Cache* cache, + CacheCollectType cache_type): + BaseCacheCollector(cache_type), + cache_(cache) {} + + virtual ~LRUCacheCollector() {} + +protected: + int64_t HitRate() override { + if (cache_ == NULL) { + return 0; + } + + double hit_rate = cache_->HitRate(true); + return isnan(hit_rate) ? -1 : static_cast(hit_rate * 100.0d); + } + + int64_t Entries() override { return cache_ == NULL ? 0 : static_cast(cache_->Entries()); } + + int64_t TotalCharge() override { return cache_ == NULL ? 0 : static_cast(cache_->TotalCharge()); } +private: + leveldb::Cache* cache_; +}; + +class TableCacheCollector : public BaseCacheCollector { +public: + TableCacheCollector(leveldb::TableCache* cache, + CacheCollectType cache_type): + BaseCacheCollector(cache_type), + cache_(cache) {} + + virtual ~TableCacheCollector() {} + +protected: + int64_t HitRate() override { + if (cache_ == NULL) { + return 0; + } + + double hit_rate = cache_->HitRate(true); + return isnan(hit_rate) ? -1 : static_cast(hit_rate * 100.0d); + } + + int64_t Entries() override { return cache_ == NULL ? 0 : static_cast(cache_->TableEntries()); } + + int64_t TotalCharge() override { return cache_ == NULL ? 0 : static_cast(cache_->ByteSize()); } +private: + leveldb::TableCache* cache_; +}; + +} // end namespace tera + +#endif // TERA_COMMOM_METRIC_CACHE_COLLECTOR_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/collector.h b/src/common/metric/collector.h new file mode 100644 index 000000000..0b31bb446 --- /dev/null +++ b/src/common/metric/collector.h @@ -0,0 +1,15 @@ +#pragma once +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include +#include + +namespace tera{ +class Collector { +public: + virtual ~Collector() {} + // return a instant value of the metric for tera to dump log and other usage + virtual int64_t Collect() = 0; +}; +} diff --git a/src/common/metric/collector_report.h b/src/common/metric/collector_report.h new file mode 100644 index 000000000..8c453dcaa --- /dev/null +++ b/src/common/metric/collector_report.h @@ -0,0 +1,49 @@ +#pragma once +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include +#include +#include + +#include "common/metric/metric_id.h" +#include "common/mutex.h" +#include "common/metric/collector.h" +#include "common/metric/subscriber.h" + +namespace tera { + +using CollectorReportMap = std::unordered_map; + +struct CollectorReport { + int64_t timestamp_ms; // timestamp of the report + int64_t interval_ms; // time interval since last report + + // metric_id to metric snapshot + CollectorReport() : timestamp_ms(get_millis()) {} + + // find methods, return 0 if not found + int64_t FindMetricValue(const MetricId& metric_id) const { + auto iter = report.find(metric_id); + return iter == report.end() ? 0 : iter->second; + }; + + int64_t FindMetricValue(const std::string& metric_name) const { + return FindMetricValue(MetricId(metric_name)); + } + + int64_t FindMetricValue(const std::string& metric_name, const std::string& label_str) const { + MetricId metric_id; + if (!MetricId::ParseFromString(metric_name, label_str, &metric_id)) { + return 0; + } else { + return FindMetricValue(metric_id); + } + } + + CollectorReportMap report; +}; +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/collector_report_publisher.cc b/src/common/metric/collector_report_publisher.cc new file mode 100644 index 000000000..620cc4107 --- /dev/null +++ b/src/common/metric/collector_report_publisher.cc @@ -0,0 +1,150 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "common/metric/collector_report_publisher.h" + +#include "glog/logging.h" + +#include "common/metric/hardware_collectors.h" +#include "common/timer.h" +#include "common/metric/collector.h" +#include "common/metric/prometheus_subscriber.h" + +namespace tera { + +CollectorReportPublisher& CollectorReportPublisher::GetInstance() { + static CollectorReportPublisher instance; + return instance; +} + +CollectorReportPublisher::CollectorReportPublisher(): + last_report_timestamp_(get_millis()), + last_collector_report_(new CollectorReport) { + AddHardwareCollectors(); +} + +CollectorReportPublisher::~CollectorReportPublisher() {} + +std::shared_ptr CollectorReportPublisher::GetSubscriberReport() { + std::lock_guard lock(mutex_); + + std::shared_ptr new_report(new SubscriberReport()); + int64_t start_ts = get_millis(); + // do collect + for (auto& subscriber_pair : subscribers_) { + const MetricId& metric_id = subscriber_pair.first; + new_report->insert(std::make_pair(metric_id, subscriber_pair.second->Collect())); + } + + int64_t end_ts = get_millis(); + VLOG(12) << "[Metric] Get Subscriber Summary Cost: " << (end_ts - start_ts) << " ms."; + return new_report; +} + +std::shared_ptr CollectorReportPublisher::GetCollectorReport() { + std::lock_guard lock(mutex_); + return last_collector_report_; +} + +void CollectorReportPublisher::Refresh() { + std::lock_guard lock(mutex_); + + std::shared_ptr new_report(new CollectorReport()); + int64_t start_ts = new_report->timestamp_ms; + new_report->interval_ms = new_report->timestamp_ms - last_report_timestamp_; + + // do collect + for (auto& metric_pair : collectors_) { + const MetricId& metric_id = metric_pair.first; + int64_t value = metric_pair.second->Collect(); + new_report->report[metric_id] = value; + } + + last_report_timestamp_ = start_ts; + int64_t end_ts = get_millis(); + VLOG(12) << "[Metric] Refresh Collectors Cost: " << (end_ts - start_ts) << " ms."; + last_collector_report_ = new_report; + NotifySubscribers(); +} + +bool CollectorReportPublisher::AddCollector(const MetricId& metric_id, + std::unique_ptr&& metric_collector, + SubscriberTypeList type_list) { + if (!metric_id.IsValid() || !metric_collector) { + return false; + } + + std::lock_guard lock(mutex_); + auto insert_ret = collectors_.insert(std::make_pair(metric_id, std::move(metric_collector))); + if (!insert_ret.second) { + return false; + } + + for (auto type : type_list) { + if (!AddSubscriber(std::unique_ptr(new PrometheusSubscriber(metric_id, type)))) { + LOG(ERROR) << "[METRIC] Add Subscriber For " << metric_id.ToString() << " Failed!"; + } + } + + return true; +} + +bool CollectorReportPublisher::AddSubscriber(std::unique_ptr&& prometheus_subscriber_ptr) { + if (!prometheus_subscriber_ptr || + !prometheus_subscriber_ptr->GetMetricId().IsValid()) { + // invalid arguments + return false; + } + + std::lock_guard lock(mutex_); + subscribers_.insert(std::make_pair(prometheus_subscriber_ptr->GetMetricId(), + std::move(prometheus_subscriber_ptr))); + + return true; +} + +void CollectorReportPublisher::NotifySubscribers() { + std::lock_guard lock(mutex_); + for (auto& subscriber_pair : subscribers_) { + subscriber_pair.second->OnUpdate(last_collector_report_); + } +} + +bool CollectorReportPublisher::HasCollector(const MetricId& metric_id) const { + std::lock_guard lock(mutex_); + return collectors_.find(metric_id) != collectors_.end(); +} + +bool CollectorReportPublisher::DeleteCollector(const MetricId& metric_id) { + std::lock_guard lock(mutex_); + DeleteSubscriber(metric_id); + return collectors_.erase(metric_id) > 0; +} + +bool CollectorReportPublisher::DeleteSubscriber(const MetricId& metric_id) { + std::lock_guard lock(mutex_); + return subscribers_.erase(metric_id) > 0; +} + +void CollectorReportPublisher::DeleteSubscribers() { + subscribers_.clear(); +} + +void CollectorReportPublisher::AddHardwareCollectors() { + // register hardware metrics + AddCollector(MetricId(kInstCpuMetricName), std::unique_ptr(new CpuUsageCollector())); + AddCollector(MetricId(kInstMemMetricName), std::unique_ptr(new MemUsageCollector())); + + AddCollector(MetricId(kInstNetRXMetricName), + std::unique_ptr(new NetUsageCollector(RECEIVE)), + {SubscriberType::MAX}); + + AddCollector(MetricId(kInstNetTXMetricName), + std::unique_ptr(new NetUsageCollector(TRANSMIT)), + {SubscriberType::MAX}); +} +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/collector_report_publisher.h b/src/common/metric/collector_report_publisher.h new file mode 100644 index 000000000..1290f2000 --- /dev/null +++ b/src/common/metric/collector_report_publisher.h @@ -0,0 +1,162 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_COMMON_METRIC_METRICS_H_ +#define TERA_COMMON_METRIC_METRICS_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/metric/metric_id.h" +#include "common/metric/collector_report.h" +#include "common/metric/collector.h" +#include "common/metric/subscriber.h" + +namespace tera { +// Base class for metric value collector + +using SubscriberTypeList = std::initializer_list; + +class CollectorReportPublisher { +private: + // set private for singleton + CollectorReportPublisher(); + ~CollectorReportPublisher(); + + // disallow copy + CollectorReportPublisher(const CollectorReportPublisher&) = delete; + CollectorReportPublisher& operator = (const CollectorReportPublisher&) = delete; + +public: + static CollectorReportPublisher& GetInstance(); + + void Refresh(); + /// report the instant values of collectors + std::shared_ptr GetCollectorReport(); + std::shared_ptr GetSubscriberReport(); + + /// Add a collector with a given metric_id + /// collector should be a right value reference of std::unique_ptr + /// return true if register success, + /// retrun false if argument is invalid or metric_id name has been registered already. + bool AddCollector(const MetricId& metric_id, + std::unique_ptr&& metric_collector, + SubscriberTypeList type_list = {SubscriberType::LATEST}); + + + /// weather a collector has been Added + bool HasCollector(const MetricId& metric_id) const; + /// Delete a collector + bool DeleteCollector(const MetricId& metric_id); + + + /// Add a subscriber to a given metricId. + /// Different type of subscribers can be registered to a same metricId. + bool AddSubscriber(std::unique_ptr&& subscriber); + /// Delete a subscriber + bool DeleteSubscriber(const MetricId& metric_id); + void DeleteSubscribers(); + +private: + void NotifySubscribers(); + void AddHardwareCollectors(); + +private: + mutable std::recursive_mutex mutex_; + + using CollectorMap = std::unordered_map>; + + using SubscriberMap = std::unordered_multimap>; + CollectorMap collectors_; + SubscriberMap subscribers_; + + int64_t last_report_timestamp_; + + std::shared_ptr last_collector_report_; +}; + +class AutoCollectorRegister { +public: + AutoCollectorRegister(const MetricId& id, + std::unique_ptr&& collector, + SubscriberTypeList type_list = {SubscriberType::LATEST}): + registered_(false), + id_(id) { + registered_ = CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); + } + + // create a metric with empty label + AutoCollectorRegister(const std::string& name, + std::unique_ptr&& collector, + SubscriberTypeList type_list = {SubscriberType::LATEST}): + registered_(false), + id_(name) { + if (name.empty()) { + throw std::invalid_argument("name"); + } + registered_ = CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); + } + + // create a metric with name and label + // label_str format: k1:v1,k2:v2,... + // can build by LabelStringBuilder().Append("k1", "v1").Append("k2","v2").ToString(); + AutoCollectorRegister(const std::string& name, + const std::string& label_str, + std::unique_ptr&& collector, + SubscriberTypeList type_list = {SubscriberType::LATEST}): + registered_(false) { + // parse metric id + MetricId::ParseFromStringWithThrow(name, label_str, &id_); + registered_ = CollectorReportPublisher::GetInstance().AddCollector(id_, std::move(collector), type_list); + } + + ~AutoCollectorRegister() { + if (registered_) { + CollectorReportPublisher::GetInstance().DeleteCollector(id_); + } + } + + const MetricId& GetId() const { + return id_; + } + + bool IsRegistered() const { + return registered_; + } + +private: + bool registered_; + MetricId id_; +}; + + +class AutoSubscriberRegister { +public: + AutoSubscriberRegister(std::unique_ptr&& subscriber_ptr):registered_(false) { + if (subscriber_ptr) { + metric_id_ = subscriber_ptr->GetMetricId(); + registered_ = CollectorReportPublisher::GetInstance().AddSubscriber(std::move(subscriber_ptr)); + } + } + ~AutoSubscriberRegister(){ + if (registered_) { + CollectorReportPublisher::GetInstance().DeleteSubscriber(metric_id_); + } + } +private: + bool registered_; + MetricId metric_id_; +}; +} // end namespace tera + +#endif // TERA_COMMON_METRIC_METRICS_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/counter_collector.h b/src/common/metric/counter_collector.h new file mode 100644 index 000000000..1a5ea981b --- /dev/null +++ b/src/common/metric/counter_collector.h @@ -0,0 +1,41 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_COMMON_METRIC_COUNTER_COLLECTOR_H_ +#define TERA_COMMON_METRIC_COUNTER_COLLECTOR_H_ + +#include "common/metric/collector.h" +#include "common/counter.h" + +namespace tera { + +class CounterCollector : public Collector { +public: + /// if is_periodic is true, the counter will be cleared when collect + /// this parameter is usually true, but it's false with some instantaneous value + /// Eg: read_pending_count, scan_pending_count, which can't be clear during collect. + explicit CounterCollector(Counter* counter, + bool is_periodic = true): + counter_(counter), + is_periodic_(is_periodic) {} + + ~CounterCollector() override {} + + int64_t Collect() override { + if (counter_ == NULL) { + return -1; + } else { + return is_periodic_ ? counter_->Clear() : counter_->Get(); + } + } +private: + Counter* const counter_; + const bool is_periodic_; +}; +} // end namespace tera + +#endif // TERA_COMMON_METRIC_COUNTER_COLLECTOR_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/hardware_collectors.cc b/src/common/metric/hardware_collectors.cc new file mode 100644 index 000000000..cddfd6ee6 --- /dev/null +++ b/src/common/metric/hardware_collectors.cc @@ -0,0 +1,250 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include +#include +#include +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "common/timer.h" + +#include "common/metric/hardware_collectors.h" + +DECLARE_int64(tera_hardware_collect_period_second); + +namespace tera { + +// return number of cpu(cores) +static uint32_t GetCpuCount() { +#if defined(_SC_NPROCESSORS_ONLN) + return sysconf(_SC_NPROCESSORS_ONLN); +#else + FILE *fp = fopen("/proc/stat", "r"); + if (fp == NULL) { + LOG(ERROR) << "[HardWare Metric] open /proc/stat failed."; + return 1; + } + static const size_t kLineMaxLen = 256; // enough in here + std::unique_ptr aline(new char[kLineMaxLen]); + if (!aline) { + LOG(ERROR) << "[HardWare Metric] malloc failed."; + return 1; + } + static const size_t kHeaderMaxLen = 10; + char header[kHeaderMaxLen]; + uint32_t i = 0; + size_t len = 0; + char* line_ptr = aline.get(); + getline(&line_ptr, &len, fp); // drop the first line + while (getline(&line_ptr, &len, fp)) { + i++; + sscanf(line_ptr, "%s", header); + if (!strncmp(header, "intr", kHeaderMaxLen)) { + break; + } + } + fclose(fp); + return std::max(i - 1, 1); +#endif +} + +// return the number of ticks(jiffies) that this process +// has been scheduled in user and kernel mode. +static bool ProcessCpuTick(const std::string& stat_path, int64_t* tick) { + if (tick == NULL) { + return false; + } + FILE *fp = fopen(stat_path.c_str(), "r"); + if (fp == NULL) { + LOG(ERROR) << "[HardWare Metric] open " << stat_path << " failed."; + return false; + } + long long utime = 0; + long long stime = 0; + if (fscanf(fp, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %lld %lld", + &utime, &stime) < 2) { + fclose(fp); + LOG(ERROR) << "[HardWare Metric] get cpu tick from " << stat_path << " failed."; + return false; + } + fclose(fp); + *tick = utime + stime; + return true; +} + +CpuUsageCollector::CpuUsageCollector(): + pid_(getpid()), + cpu_core_num_(GetCpuCount()), + cpu_hertz_(sysconf(_SC_CLK_TCK)), + stat_path_(std::string("/proc/") + std::to_string(pid_) + "/stat"), + last_check_time_ms_(get_millis()), + last_tick_total_(0), + cpu_usage_(0) {} + +CpuUsageCollector::~CpuUsageCollector() {} + +int64_t CpuUsageCollector::Collect() { + int64_t cur_ts = get_millis(); + int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; + if (collect_period_ms > 0 && cur_ts < last_check_time_ms_ + collect_period_ms) { + return cpu_usage_; + } else { + return CheckCpuUsage(cur_ts, false); + } +} + +int64_t CpuUsageCollector::CheckCpuUsage(int64_t cur_ts, bool is_irix_on) { + int64_t new_tick_total = 0; + if (!ProcessCpuTick(stat_path_, &new_tick_total)) { + // read proc file failed. + return 0; + } + + float interval_sec = static_cast(cur_ts - last_check_time_ms_) / 1000.0f; + // percentage per tick during time interval + float interval_total_ticks = static_cast(cpu_hertz_) * interval_sec; + if (!is_irix_on) { + interval_total_ticks *= cpu_core_num_; + } + + float usage_percentage = static_cast(new_tick_total - last_tick_total_) * 100.0f / interval_total_ticks; + usage_percentage = std::min(usage_percentage, 99.9f); + + // update + last_tick_total_ = new_tick_total; + cpu_usage_ = static_cast(usage_percentage); + last_check_time_ms_ = cur_ts; + VLOG(15) << "[Hardware Metric] %CPU: " << usage_percentage; + return cpu_usage_; +} + +MemUsageCollector::MemUsageCollector(): + pid_(getpid()), + stat_path_(std::string("/proc/") + std::to_string(pid_) + "/statm"), + last_check_time_ms_(get_millis()), + mem_usage_(0) {} + + +MemUsageCollector::~MemUsageCollector() {} + +int64_t MemUsageCollector::Collect() { + int64_t cur_ts = get_millis(); + int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; + if (collect_period_ms > 0 && cur_ts < last_check_time_ms_ + collect_period_ms) { + return mem_usage_; + } else { + return CheckMemUsage(cur_ts); + } +} + +int64_t MemUsageCollector::CheckMemUsage(int64_t cur_ts) { + FILE* stat_file = fopen(stat_path_.c_str(), "r"); + if (stat_file == NULL) { + LOG(ERROR) << "[Hardware Metric] open " << stat_path_ << " failed."; + return false; + } + + int64_t mem_pages = 0; + fscanf(stat_file, "%*d %ld", &mem_pages); + fclose(stat_file); + + mem_usage_ = mem_pages * 4 * 1024; + last_check_time_ms_ = cur_ts; + VLOG(15) << "[Hardware Metric] Memory: " << mem_usage_; + return mem_usage_; +} + +NetUsageCollector::NetInfoChecker NetUsageCollector::net_info_checker_; + +NetUsageCollector::NetUsageCollector(NetUsageType n_type): + net_usage_type_(n_type) {} + +NetUsageCollector::~NetUsageCollector() {} + +int64_t NetUsageCollector::Collect() { + int64_t cur_ts = get_millis(); + int64_t collect_period_ms = FLAGS_tera_hardware_collect_period_second * 1000; + if (collect_period_ms > 0 && + cur_ts < net_info_checker_.last_check_time_ms_ + collect_period_ms) { + return net_usage_type_ == RECEIVE ? net_info_checker_.net_rx_usage_ : net_info_checker_.net_tx_usage_; + } else { + int64_t value = 0; + if (net_usage_type_ == RECEIVE) { + // check net info and get receive usage + net_info_checker_.CheckNetUsage(cur_ts, &value, NULL); + } else { + // check net info and get transmit usage + net_info_checker_.CheckNetUsage(cur_ts, NULL, &value); + } + return value; + } +} + +NetUsageCollector::NetInfoChecker::NetInfoChecker() + : pid_(getpid()), + stat_path_(std::string("/proc/") + std::to_string(pid_) + "/net/dev"), + last_check_time_ms_(get_millis()), + last_rx_total_(0), + last_tx_total_(0), + net_rx_usage_(0), + net_tx_usage_(0) { + GetCurrentTotal(&last_rx_total_, &last_tx_total_); +} + +bool NetUsageCollector::NetInfoChecker::GetCurrentTotal(int64_t *rx_total, int64_t *tx_total) { + FILE* stat_file = fopen(stat_path_.c_str(), "r"); + if (stat_file == NULL) { + LOG(ERROR) << "[Hardware Metric] open " << stat_path_ << "failed."; + return false; + } + int ret = fseek(stat_file, 327, SEEK_SET); + CHECK_EQ(ret, 0); + for (int i = 0; i < 10; i++) { + while (':' != fgetc(stat_file)); + ret = fscanf(stat_file, "%ld%*d%*d%*d%*d%*d%*d%*d%ld", rx_total, tx_total); + if (ret >= 2 && rx_total > 0 && tx_total > 0) { + break; + } + } + fclose(stat_file); + + return true; +} + +bool NetUsageCollector::NetInfoChecker::CheckNetUsage(int64_t cur_ts, int64_t* rx_usage, int64_t *tx_usage) { + int64_t new_rx_total = 0; + int64_t new_tx_total = 0; + + if (!GetCurrentTotal(&new_rx_total, &new_tx_total)) { + return false; + } + int64_t interval_ms = cur_ts - last_check_time_ms_; + // update + net_rx_usage_ = (new_rx_total - last_rx_total_) * 1000 / interval_ms; + net_tx_usage_ = (new_tx_total - last_tx_total_) * 1000 / interval_ms; + last_rx_total_ = new_rx_total; + last_tx_total_ = new_tx_total; + last_check_time_ms_ = cur_ts; + + if (rx_usage) { + *rx_usage = net_rx_usage_; + } + + if (tx_usage) { + *tx_usage = net_tx_usage_; + } + + VLOG(15) << "[Hardware Metric] Network RX/TX: " << last_rx_total_ << " / " << last_tx_total_; + return true; +} + +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/hardware_collectors.h b/src/common/metric/hardware_collectors.h new file mode 100644 index 000000000..be04e4165 --- /dev/null +++ b/src/common/metric/hardware_collectors.h @@ -0,0 +1,104 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_COMMON_METRIC_HARDWARE_METRICS_H_ +#define TERA_COMMON_METRIC_HARDWARE_METRICS_H_ + +#include + +#include "common/metric/collector_report_publisher.h" +#include "common/metric/collector.h" + +namespace tera { + +const char* const kInstCpuMetricName = "tera_instance_cpu_usage_percent"; +const char* const kInstMemMetricName = "tera_instance_mem_usage_bytes"; +const char* const kInstNetRXMetricName = "tera_instance_net_receive_bytes"; +const char* const kInstNetTXMetricName = "tera_instance_net_transmit_bytes"; + +class CpuUsageCollector : public Collector { +public: + CpuUsageCollector(); + virtual ~CpuUsageCollector(); + + virtual int64_t Collect(); +private: + int64_t CheckCpuUsage(int64_t cur_ts, bool is_irix_on); + +private: + // proc info + int pid_; + uint32_t cpu_core_num_; + int64_t cpu_hertz_; + std::string stat_path_; + + // last check info + int64_t last_check_time_ms_; + int64_t last_tick_total_; // cpu total ticks at last check + int64_t cpu_usage_; // (new_tick_total - last_tick_total_) / (total ticks in interval) +}; + +class MemUsageCollector : public Collector { +public: + MemUsageCollector(); + virtual ~MemUsageCollector(); + + virtual int64_t Collect(); +private: + int64_t CheckMemUsage(int64_t cur_ts); + +private: + // proc info + int pid_; + std::string stat_path_; + + // last check info + int64_t last_check_time_ms_; + int64_t mem_usage_; +}; + +enum NetUsageType { + RECEIVE, // net_rx + TRANSMIT, // net_tx +}; + +class NetUsageCollector : public Collector { +public: + explicit NetUsageCollector(NetUsageType n_type); + virtual ~NetUsageCollector(); + + virtual int64_t Collect(); +private: + struct NetInfoChecker { + // proc info + int pid_; + std::string stat_path_; + + // last check info + int64_t last_check_time_ms_; + int64_t last_rx_total_; // total rx bytes at last check + int64_t last_tx_total_; // total tx bytes at last check + + // metric value cache + int64_t net_rx_usage_; // (new_rx_total - last_rx_total_) / check_interval + int64_t net_tx_usage_; // (new_tx_total - last_tx_total_) / check_interval + + NetInfoChecker(); + + bool GetCurrentTotal(int64_t*, int64_t*); + bool CheckNetUsage(int64_t cur_ts, int64_t* rx_usage, int64_t *tx_usage); + }; + + static NetInfoChecker net_info_checker_; + +private: + NetUsageType net_usage_type_; +}; + +} // end namespace tera + +#endif // TERA_COMMON_METRIC_HARDWARE_METRICS_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/metric_counter.h b/src/common/metric/metric_counter.h new file mode 100644 index 000000000..55b4c59fe --- /dev/null +++ b/src/common/metric/metric_counter.h @@ -0,0 +1,93 @@ +#pragma once +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include "common/metric/collector_report_publisher.h" +#include "common/metric/counter_collector.h" +#include "common/counter.h" + +namespace tera{ +class MetricCounter : public Counter { +public: + // create a metric with empty label + explicit MetricCounter(const std::string& name, + SubscriberTypeList type_list = {SubscriberType::LATEST}, + bool is_periodic = true): + Counter(), + registered_(false), + metric_id_(name), + type_list_(type_list), + is_periodic_(is_periodic) { + if (name.empty()) { + // throw a exception and make process exit with coredump + throw std::invalid_argument("metric name is empty"); + } + registered_ = CollectorReportPublisher::GetInstance().AddCollector( + metric_id_, + std::unique_ptr(new CounterCollector(this, is_periodic_)), + type_list_); + } + + // create a metric with name and label + // label_str format: k1:v1,k2:v2,... + // can build by LabelStringBuilder().Append("k1", "v1").Append("k2","v2").ToString(); + MetricCounter(const std::string& name, + const std::string& label_str, + SubscriberTypeList type_list = {SubscriberType::LATEST}, + bool is_periodic = true): + Counter(), + registered_(false), + type_list_(type_list), + is_periodic_(is_periodic) { + // parse metric id + MetricId::ParseFromStringWithThrow(name, label_str, &metric_id_); + // legal label str format, do register + registered_ = CollectorReportPublisher::GetInstance().AddCollector( + metric_id_, + std::unique_ptr(new CounterCollector(this, is_periodic_)), + type_list); + } + + MetricCounter(MetricCounter&& counter) { + // parse metric id + if (counter.registered_) { + CollectorReportPublisher::GetInstance().DeleteCollector(counter.metric_id_); + } + registered_ = counter.registered_; + metric_id_ = counter.metric_id_; + is_periodic_ = counter.is_periodic_; + type_list_ = counter.type_list_; + Set(counter.Get()); + counter.registered_ = false; + registered_ = CollectorReportPublisher::GetInstance().AddCollector( + metric_id_, + std::unique_ptr(new CounterCollector(this, is_periodic_)), + type_list_); + } + + virtual ~MetricCounter() { + if (registered_) { + // do unregister + CollectorReportPublisher::GetInstance().DeleteCollector(metric_id_); + } + } + + bool IsRegistered() const { + return registered_; + } + + //Never copyied + MetricCounter(const MetricCounter&) = delete; + MetricCounter& operator=(const MetricCounter&) = delete; + +private: + bool registered_; + MetricId metric_id_; + SubscriberTypeList type_list_; + bool is_periodic_; +}; +} diff --git a/src/common/metric/metric_http_server.cc b/src/common/metric/metric_http_server.cc new file mode 100644 index 000000000..fdb01910c --- /dev/null +++ b/src/common/metric/metric_http_server.cc @@ -0,0 +1,232 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "common/metric/metric_http_server.h" + +#include +#include +#include + +#include "glog/logging.h" + +#include "common/timer.h" +#include "common/metric/collector_report.h" + +using std::string; + +namespace tera { + +void ResponseBodyBuilder::BuildType(string* body, const string& metric_name, const string& type) { + body->append("# TYPE " + metric_name + " " + type + "\n"); +} + +void ResponseBodyBuilder::BuildHelp(string* body, const string& metric_name, const string& help_info) { + body->append("# HELP " + metric_name + " " + help_info + "\n"); +} + +void ResponseBodyBuilder::BuildMetricItem(string* body, const MetricId& metric_id, const ReportItem& report_item) { + + VLOG(12) << "[Building Metric] name: " << metric_id.GetName() + << "\tValue: " << static_cast(report_item.Value()) + << "\tTimeStamp: " << report_item.Time() + << "\tType: " << report_item.Type(); + + if (report_item.Time() == -1) { + return; + } + + body->append(metric_id.GetName() + "{"); + const auto& label_map = metric_id.GetLabelMap(); + auto iter = label_map.begin(); + bool has_label = false; + if (iter != label_map.end()) { + body->append(iter->first + "=" + "\"" + iter->second + "\""); + has_label = true; + ++iter; + } + while (iter != label_map.end()) { + body->append("," + iter->first + "=" + "\"" + iter->second + "\""); + ++iter; + } + + if (has_label) { + body->append(",value_type=\"" + report_item.Type() + "\""); + } else { + body->append("value_type=\"" + report_item.Type() + "\""); + } + + body->append("} " + std::to_string(report_item.Value()) + " " + std::to_string(report_item.Time())); + body->append("\n"); +} + +static const int kMongoosePollTimeoutMs = 1000; + +static void LogRequest(struct http_message *request) { + VLOG(16) << "[MetricHttpServer] Recv http request." + << " method [" << std::string(request->method.p, request->method.len) << "]" + << " uri [" << std::string(request->uri.p, request->uri.len) << "]" + << " proto [" << std::string(request->proto.p, request->proto.len) << "]" + << " query [" << std::string(request->query_string.p, request->query_string.len) << "]" + << " body [" << std::string(request->body.p, request->body.len) << "]"; +} + +void MetricHttpServer::EventHandler(struct mg_connection *conn, int event, void *p_data) { + if (event == MG_EV_HTTP_REQUEST) { + if (conn == NULL || conn->mgr == NULL || p_data == NULL) { + LOG(WARNING) << "[MetricHttpServer] handle invalid request."; + return; + } + + // get user data + void* user_data = conn->mgr->user_data; + if (user_data == NULL) { + LOG(WARNING) << "[MetricHttpServer] Connection missing user data."; + return; + } + + MetricHttpServer *server = reinterpret_cast(user_data); + struct http_message *request = reinterpret_cast(p_data); + server->HandleHttpRequest(conn, request); + } + // ignore other events +} + +MetricHttpServer::MetricHttpServer(): + is_running_(false), + stop_(false), + listen_port_(-1) {} + +MetricHttpServer::~MetricHttpServer() {} + +bool MetricHttpServer::Start(int32_t listen_port) { + if (listen_port <= 0) { + LOG(WARNING) << "[MetricHttpServer] Start got invalid listen port: " << listen_port; + return false; + } + + MutexLock lock(&mutex_); + if (IsRunning()) { + LOG(WARNING) << "[MetricHttpServer] Server is already running, listening: " << listen_port_; + return false; + } + + // init mongoose use this as user_data + mg_mgr_init(&mongoose_mgr_, this); + + // bind listen port + std::string bind_addr = std::to_string(listen_port); + struct mg_connection *conn = mg_bind(&mongoose_mgr_, bind_addr.c_str(), &MetricHttpServer::EventHandler); + + if (conn == NULL) { + LOG(WARNING) << "[MetricHttpServer] Bind port [" << listen_port << "] failed."; + mg_mgr_free(&mongoose_mgr_); + return false; + } + + mg_set_protocol_http_websocket(conn); + LOG(INFO) << "[MetricHttpServer] Bind port [" << listen_port << "] success."; + + stop_.store(false); + if (!bg_thread_.Start(std::bind(&MetricHttpServer::BackgroundWorkWrapper, this))) { + mg_mgr_free(&mongoose_mgr_); + LOG(WARNING) << "[MetricHttpServer] Start background thread failed."; + return false; + } + return true; +} + +void MetricHttpServer::Stop() { + MutexLock lock(&mutex_); + if (!IsRunning()) { + return; + } + + stop_.store(true); + bg_thread_.Join(); + listen_port_ = -1; +} + +void MetricHttpServer::BackgroundWorkWrapper() { + LOG(INFO) << "[MetricHttpServer] Start background work"; + is_running_.store(true); + while (!stop_.load()) { + mg_mgr_poll(&mongoose_mgr_, kMongoosePollTimeoutMs); + } + is_running_.store(false); + mg_mgr_free(&mongoose_mgr_); + LOG(INFO) << "[MetricHttpServer] Exit background work"; +} + +void MetricHttpServer::HandleHttpRequest(struct mg_connection *conn, struct http_message *request) { + int64_t start_ts = get_micros(); + LogRequest(request); + + // select real handler based on uri + std::string uri(request->uri.p, request->uri.len); + if (uri == "/metrics") { + HandleMetrics(conn, request); + } else { + HandleUnknowUri(conn, request); + } + int64_t end_ts = get_micros(); + VLOG(16) << "[MetricHttpServer] Handle uri [" << uri << "] cost [" << (end_ts - start_ts) << "] us."; +} + +void MetricHttpServer::HandleUnknowUri(struct mg_connection *conn, struct http_message *request) { + VLOG(16) << "[MetricHttpServer] Handle unknow uri [" + << std::string(request->uri.p, request->uri.len) << "] ..."; + mg_send_head(conn, 404, 0, "Content-Type: text/plain"); +} + +void MetricHttpServer::HandleMetrics(struct mg_connection *conn, struct http_message *request) { + std::string body(GetResponseBody()); + mg_printf(conn, "HTTP/1.1 200 OK\r\nContent-Type: %s\r\n", "text/plain"); + mg_printf(conn, "Content-Length: %lu\r\n\r\n", static_cast(body.size())); + mg_send(conn, body.data(), body.size()); +} + +string MetricHttpServer::GetResponseBody() { + int64_t start_ts = get_millis(); + std::shared_ptr cur_report = + CollectorReportPublisher::GetInstance().GetSubscriberReport(); + + if (!cur_report) { + LOG(WARNING) << "[MetricHttpServer] Subscriber Report Is Empty"; + return ""; + } + + //pair + using MetricIdValuePair = SubscriberReport::value_type; + //Vector of pair + using MetricIdValueVec = std::vector; + // MetricNameMap: map< metric_name, vector< pair > > + using MetricNameMap = std::unordered_map; + + MetricNameMap metric_name_map; + + for (const auto& report_item : *cur_report) { + const std::string& metric_name = report_item.first.GetName(); + metric_name_map[metric_name].push_back(&report_item); + } + + std::string body; + // fill MetricFamilyVec + for (const auto& metric_item : metric_name_map) { + ResponseBodyBuilder::BuildHelp(&body, metric_item.first, metric_item.first); + ResponseBodyBuilder::BuildType(&body, metric_item.first, "gauge"); + + const MetricIdValueVec& metric_vec = metric_item.second; + + std::for_each(metric_vec.begin(), metric_vec.end(), [&body, this](const MetricIdValuePair* x) { + ResponseBodyBuilder::BuildMetricItem(&body, x->first, x->second); + }); + } + VLOG(12) << "[MetricHttpServer] Get Response Body cost: " << + get_millis() - start_ts << " ms"; + return std::move(body); +} +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/metric_http_server.h b/src/common/metric/metric_http_server.h new file mode 100644 index 000000000..a0b735450 --- /dev/null +++ b/src/common/metric/metric_http_server.h @@ -0,0 +1,84 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_COMMON_METRIC_METRIC_HTTP_SERVER_H_ +#define TERA_COMMON_METRIC_METRIC_HTTP_SERVER_H_ + +#include +#include +#include + +#include "mongoose.h" + +#include "common/metric/collector_report_publisher.h" +#include "common/mutex.h" +#include "common/thread.h" + +namespace tera { + +struct ResponseBodyBuilder { + static void BuildType(std::string* body, + const std::string& metric_name, + const std::string& type); + + static void BuildHelp(std::string* body, + const std::string& metric_name, + const std::string& help_info); + + static void BuildMetricItem(std::string* body, + const MetricId& metric_id, + const ReportItem& report_item); +}; + +// a simple http server based on mongoose +class MetricHttpServer { +public: + MetricHttpServer(); + ~MetricHttpServer(); + +private: + // disallow copy + MetricHttpServer(const MetricHttpServer&) = delete; + MetricHttpServer& operator = (const MetricHttpServer&) = delete; + +private: + static void EventHandler(struct mg_connection *conn, int event, void *p_data); + +public: + bool Start(int32_t listen_port); + void Stop(); + + bool IsRunning() const { + return is_running_.load(); + } + +private: + void BackgroundWorkWrapper(); + + // http request handlers + void HandleHttpRequest(struct mg_connection *conn, struct http_message *request); + void HandleMetrics(struct mg_connection *conn, struct http_message *request); + void HandleUnknowUri(struct mg_connection *conn, struct http_message *request); + + // prometheus handle functions + std::string GetResponseBody(); + +private: + mutable Mutex mutex_; + std::atomic is_running_; + std::atomic stop_; + int32_t listen_port_; + + // background thread + common::Thread bg_thread_; + + // mongoose info + struct mg_mgr mongoose_mgr_; +}; + +} // end namespace tera + +#endif // TERA_COMMON_METRIC_METRIC_HTTP_SERVER_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/common/metric/metric_id.cc b/src/common/metric/metric_id.cc new file mode 100644 index 000000000..b77ee095c --- /dev/null +++ b/src/common/metric/metric_id.cc @@ -0,0 +1,156 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "common/metric/metric_id.h" + +#include +#include + +namespace tera { + +static const std::string kInvalidLabel = ""; + +MetricId::MetricId(const std::string& name, const std::string& label_str) { + ParseFromStringWithThrow(name, label_str, this); +} + +static std::string MetricLabelsToString(const MetricLabels& label_map) { + if (label_map.empty()) { + return ""; + } + std::ostringstream label_oss; + auto iter = label_map.begin(); + // do not append kLabelPairDelimiter for the first pair + label_oss << iter->first << kLabelKVDelimiter << iter->second; + ++iter; + + for (; iter != label_map.end(); ++iter) { + label_oss << kLabelPairDelimiter << iter->first << kLabelKVDelimiter << iter->second; + } + return label_oss.str(); +} + +std::string MetricId::GenMetricIdStr(const std::string& name, const MetricLabels& label_map) { + if (label_map.empty()) { + return name; + } + + std::ostringstream id_oss; + id_oss << name << kNameLabelsDelimiter << MetricLabelsToString(label_map); + return id_oss.str(); +} + +void MetricId::ParseFromStringWithThrow(const std::string& name, + const std::string& label_str, + MetricId* metric_id) throw(std::invalid_argument) { + if (metric_id == NULL) { + throw std::invalid_argument("metric_id is invalid"); + } + if (name.empty()) { + throw std::invalid_argument("metric name is invalid"); + } + + metric_id->name_ = name; + metric_id->labels_.clear(); + + if (label_str.empty()) { + metric_id->id_str_ = metric_id->name_; + return; + } + + // label_str format: k1:v1,k2:v2,... + std::vector label_str_splits; + boost::algorithm::split(label_str_splits, label_str, + boost::algorithm::is_any_of(kLabelPairDelimiter)); + for (const std::string& label_kv_str : label_str_splits) { + std::vector label_kv_splits; + boost::algorithm::split(label_kv_splits, label_kv_str, + boost::algorithm::is_any_of(kLabelKVDelimiter)); + if (label_kv_splits.size() != 2) { + // invalid label str format + throw std::invalid_argument("label_str"); + } + + metric_id->labels_.insert(std::make_pair(label_kv_splits[0], label_kv_splits[1])); + } + + // gen identifier string + metric_id->id_str_ = metric_id->name_ + kNameLabelsDelimiter + label_str; + return; +} + +bool MetricId::ParseFromString(const std::string& name, + const std::string& label_str, + MetricId* metric_id) throw() { + try { + ParseFromStringWithThrow(name, label_str, metric_id); + return true; + } catch (std::invalid_argument&) { + return false; + } +} + +MetricId::MetricId() : name_(), labels_(), id_str_() {} + +MetricId::MetricId(const std::string& name) + : name_(name), + labels_(), + id_str_(GenMetricIdStr(name_, labels_)) {} + +MetricId::MetricId(const std::string& name, const MetricLabels& label_map) + : name_(name), + labels_(label_map), + id_str_(GenMetricIdStr(name_, labels_)) {} + +MetricId::MetricId(const MetricId& other) + : name_(other.name_), + labels_(other.labels_), + id_str_(other.id_str_) {} + +MetricId::~MetricId() {} + +MetricId& MetricId::operator = (const MetricId& other) { + name_ = other.name_; + labels_ = other.labels_; + id_str_ = other.id_str_; + return *this; +} + +const std::string& MetricId::GetLabel(const std::string& name) const { + auto iter = labels_.find(name); + if (iter == labels_.end()) { + return kInvalidLabel; + } else { + return iter->second; + } +} + +bool MetricId::ExistLabel(const std::string& name) const { + return labels_.find(name) != labels_.end(); +} + +bool MetricId::CheckLabel(const std::string& name, const std::string& expected_value) const { + auto iter = labels_.find(name); + if (iter == labels_.end()) { + return false; + } else { + return (iter->second == expected_value); + } +} + +LabelStringBuilder& LabelStringBuilder::Append(const std::string& name, const std::string& value) { + if (!name.empty() && !value.empty()) { + labels_[name] = value; + } + return *this; +} + +std::string LabelStringBuilder::ToString() const { + return MetricLabelsToString(labels_); +} + +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/metric_id.h b/src/common/metric/metric_id.h new file mode 100644 index 000000000..cff30448e --- /dev/null +++ b/src/common/metric/metric_id.h @@ -0,0 +1,143 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_COMMON_METRIC_METRIC_ID_H_ +#define TERA_COMMON_METRIC_METRIC_ID_H_ + +#include +#include +#include +#include +#include + +namespace tera { + +// use ordered map to ensure the order of labels in id_str +typedef std::map MetricLabels; + +const char* const kNameLabelsDelimiter = "#"; +const char* const kLabelPairDelimiter = ","; +const char* const kLabelKVDelimiter = ":"; + +// A metric identifiered by name and all labels +// name: necessary, and should not be empty +// labels: optional +// +// Can get name and labels from MetricId +class MetricId { +public: + MetricId(); + explicit MetricId(const std::string& name); + MetricId(const std::string& name, const MetricLabels& label_map); + MetricId(const std::string& name, const std::string& label_str); + MetricId(const MetricId& other); + ~MetricId(); + + MetricId& operator = (const MetricId& other); + + bool IsValid() const { + return !name_.empty(); + } + + const std::string& GetName() const { + return name_; + } + + const MetricLabels& GetLabelMap() const { + return labels_; + } + + const std::string& ToString() const { + return id_str_; + } + + // access labels + const std::string& GetLabel(const std::string& name) const; + bool ExistLabel(const std::string& name) const; + bool CheckLabel(const std::string& name, const std::string& expected_value) const; + +public: + // Parse MetricId from name and formated label string + // nothrow std::invalid_argument if got illegal format arguments + static void ParseFromStringWithThrow(const std::string& name, + const std::string& label_str, + MetricId* metric_id) throw(std::invalid_argument); + // Parse MetricId from name and formated label string + // nothrow version + static bool ParseFromString(const std::string& name, + const std::string& label_str, + MetricId* metric_id) throw(); + +private: + static std::string GenMetricIdStr(const std::string& name, const MetricLabels& label_map); +private: + std::string name_; + MetricLabels labels_; + std::string id_str_; +}; + +// relational operators +// make MetricId can be the key of std::map and std::unordered_map +inline bool operator == (const MetricId& id1, const MetricId& id2) { + return id1.ToString() == id2.ToString(); +} + +inline bool operator != (const MetricId& id1, const MetricId& id2) { + return id1.ToString() != id2.ToString(); +} + +inline bool operator < (const MetricId& id1, const MetricId& id2) { + return id1.ToString() < id2.ToString(); +} + +inline bool operator <= (const MetricId& id1, const MetricId& id2) { + return id1.ToString() <= id2.ToString(); +} + +inline bool operator > (const MetricId& id1, const MetricId& id2) { + return id1.ToString() > id2.ToString(); +} + +inline bool operator >= (const MetricId& id1, const MetricId& id2) { + return id1.ToString() >= id2.ToString(); +} + +// A helper class to build formated label string +// Usage: label_str = LabelStringBuilder().Append("k1","v1").Append("k2","v2").ToString(); +class LabelStringBuilder { +public: + LabelStringBuilder() {} + ~LabelStringBuilder() {} + + // append a k-v pair + LabelStringBuilder& Append(const std::string& name, const std::string& value); + + // build formated string + std::string ToString() const; + +private: + MetricLabels labels_; +}; + +} // end namespace tera + +namespace std { +// specialization std::hash for tera::MetricId +// make MetricId can be the key of unordered_map +template<> +struct hash<::tera::MetricId> { +public: + size_t operator () (const ::tera::MetricId& id) const { + return str_hash_(id.ToString()); + } +private: + hash str_hash_; +}; + +} // end namespace std + +#endif // TERA_COMMON_METRIC_METRIC_ID_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/prometheus_subscriber.cc b/src/common/metric/prometheus_subscriber.cc new file mode 100644 index 000000000..9aca684df --- /dev/null +++ b/src/common/metric/prometheus_subscriber.cc @@ -0,0 +1,142 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "common/metric/prometheus_subscriber.h" +#include "common/metric/collector_report.h" + +DECLARE_int64(tera_metric_hold_max_time); + +namespace tera{ + +void PrometheusSubscriber::OnUpdate(std::shared_ptr report) { + int64_t value = report->FindMetricValue(metric_id_); + Append(report->timestamp_ms, value); +} + +ReportItem PrometheusSubscriber::Collect() { + ReportItem ret; + std::shared_ptr tera_queue_ptr; + int64_t last_collect_ts; + + { + std::lock_guard lock_mtx(mtx_); + if (tera_queue_ptr_->empty()) { + VLOG(12) << "[PROMETHEUS SUBSCRIBER] Empty Tera Queue"; + return ret; + } + + last_collect_ts = last_collect_ts_; + last_collect_ts_ = tera_queue_ptr_->back().first; + tera_queue_ptr = tera_queue_ptr_; + tera_queue_ptr_.reset(new TimeValueQueue); + } + + int64_t value = GetSpecificValue(tera_queue_ptr); + + if (type_ == SubscriberType::QPS || + type_ == SubscriberType::THROUGHPUT) { + int64_t time_interval = tera_queue_ptr->back().first - last_collect_ts; + value = (time_interval != 0 ? value * 1000 / time_interval : 0); + } + + ret.SetTimeValue({tera_queue_ptr->back().first, value}); + ret.SetType(GetTypeName()); + + return ret; +} + +void PrometheusSubscriber::Append(int64_t time_stamp, int64_t current_value) { + std::lock_guard mtx_lock(mtx_); + tera_queue_ptr_->emplace_back(time_stamp, current_value); + VLOG(12) << "[PROMETHEUS APPEND] " << metric_id_.GetName() + << "\tValue: " << current_value + << "\tQueue Size:" << tera_queue_ptr_->size(); + if (has_inited_) { + DropExpiredValue(); + } else { + last_collect_ts_ = time_stamp; + has_inited_ = true; + } +} + +std::string PrometheusSubscriber::GetTypeName() { + switch (type_) + { + + case SubscriberType::LATEST: + return "Latest"; + + case SubscriberType::MAX: + return "Max"; + + case SubscriberType::MIN: + return "Min"; + + case SubscriberType::SUM: + return "Sum"; + + case SubscriberType::QPS: + return "Qps"; + + case SubscriberType::THROUGHPUT: + return "ThroughPut"; + + default: + LOG(ERROR) << "Unknown collector type: "; + abort(); + + } + //Never reach here + return ""; +} + +void PrometheusSubscriber::DropExpiredValue() { + if (tera_queue_ptr_->empty()) { + return; + } + + auto last_enqueue_ts = tera_queue_ptr_->back().first; + int64_t drop_cnt = 0; + while (last_enqueue_ts - tera_queue_ptr_->front().first >= FLAGS_tera_metric_hold_max_time) { + VLOG(12) << "[PROMETHEUS SUBSCRIBER] drop last_enqueue_ts: " << last_enqueue_ts + << "first_ts: " << tera_queue_ptr_->front().first; + ++drop_cnt; + last_collect_ts_ = tera_queue_ptr_->front().first; + tera_queue_ptr_->pop_front(); + } + if (drop_cnt != 0) { + VLOG(12) << "[PROMETHEUS SUBSCRIBER] drop " << drop_cnt << "values"; + } +} + +int64_t PrometheusSubscriber::GetSpecificValue(std::shared_ptr tera_queue_ptr) { + switch (type_) + { + + case SubscriberType::LATEST: + return GetLatest(tera_queue_ptr); + + case SubscriberType::MAX: + return GetMax(tera_queue_ptr); + + case SubscriberType::MIN: + return GetMin(tera_queue_ptr); + + //Both of SUM, Qps, and THROUGHPUT use GetSum here + case SubscriberType::SUM: + case SubscriberType::QPS: + case SubscriberType::THROUGHPUT: + return GetSum(tera_queue_ptr); + + default: + LOG(ERROR) << "Unknown collector type"; + abort(); + + } + //Never reach here + return -1; +} +} diff --git a/src/common/metric/prometheus_subscriber.h b/src/common/metric/prometheus_subscriber.h new file mode 100644 index 000000000..67affa7bb --- /dev/null +++ b/src/common/metric/prometheus_subscriber.h @@ -0,0 +1,81 @@ +#pragma once +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include +#include +#include +#include +#include +#include +#include + +#include "common/metric/subscriber.h" + +namespace tera { + +using TimeValueQueue = std::deque; + +class PrometheusSubscriber : public Subscriber { +public: + PrometheusSubscriber(const MetricId& metric_id, SubscriberType type = SubscriberType::LATEST): + tera_queue_ptr_(new TimeValueQueue), + last_collect_ts_(0), + has_inited_(false), + type_(type), + metric_id_(metric_id) { } + + ~PrometheusSubscriber() override {} + ReportItem Collect() override; + void OnUpdate(const std::shared_ptr) override; + + std::string GetTypeName() override; + + const MetricId& GetMetricId() override { + return metric_id_; + } + +private: + void Append(int64_t time_stamp, int64_t current_value); + void DropExpiredValue(); + int64_t GetSpecificValue(std::shared_ptr); + + int64_t GetMax(std::shared_ptr tera_queue_ptr) { + return std::max_element(tera_queue_ptr->begin(), tera_queue_ptr->end(), + [](const TimeValuePair& x, const TimeValuePair& y) { + return x.second < y.second; + })->second; + } + + int64_t GetMin(std::shared_ptr tera_queue_ptr) { + return std::min_element(tera_queue_ptr->begin(), tera_queue_ptr->end(), + [](const TimeValuePair& x, const TimeValuePair& y) { + return x.second < y.second; + })->second; + } + + int64_t GetLatest(std::shared_ptr tera_queue_ptr) { + return tera_queue_ptr->back().second; + } + + int64_t GetSum(std::shared_ptr tera_queue_ptr) { + return std::accumulate(tera_queue_ptr->begin(), tera_queue_ptr->end(), (int64_t)0, + [](const int64_t val, const TimeValuePair& x) { + return val + x.second; + }); + } + + + std::mutex mtx_; + //queue of tera timestamp-value + std::shared_ptr tera_queue_ptr_; + //timestamp of prometheus_queue_ptr_'s last enqueue operation + int64_t last_collect_ts_; + //Is this class inited? + bool has_inited_; + //subscriber type + const SubscriberType type_; + MetricId metric_id_; +}; + +} \ No newline at end of file diff --git a/src/common/metric/ratio_collector.h b/src/common/metric/ratio_collector.h new file mode 100644 index 000000000..3a933adef --- /dev/null +++ b/src/common/metric/ratio_collector.h @@ -0,0 +1,45 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_COMMOM_METRIC_RATIO_COLLECTOR_H_ +#define TERA_COMMOM_METRIC_RATIO_COLLECTOR_H_ + +#include +#include "common/metric/collector_report_publisher.h" + +namespace tera { + +class RatioCollector : public Collector { +public: + explicit RatioCollector(Counter* first_counter, + Counter* second_counter, + bool is_periodic = true): + first_counter_(first_counter), + second_counter_(second_counter), + is_periodic_(is_periodic) {} + + int64_t Collect() override { + if (NULL == first_counter_ || NULL == second_counter_) { + return 0; + } else { + double ratio = (double)first_counter_->Get() / second_counter_->Get(); + if (is_periodic_) { + first_counter_->Clear(); + second_counter_->Clear(); + } + return isnan(ratio) ? -1 : static_cast(ratio * 100); + } + } +private: + Counter* const first_counter_; + Counter* const second_counter_; + const bool is_periodic_; +}; + +} // end namespace tera + +#endif // TERA_COMMOM_METRIC_RATIO_COLLECTOR_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/metric/ratio_subscriber.h b/src/common/metric/ratio_subscriber.h new file mode 100644 index 000000000..32656b46b --- /dev/null +++ b/src/common/metric/ratio_subscriber.h @@ -0,0 +1,58 @@ +#pragma once +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include "common/metric/subscriber.h" +#include +#include + +namespace tera { +class RatioSubscriber : public Subscriber { +public: + RatioSubscriber(const MetricId& metric_id, + std::unique_ptr&& subscriber1, + std::unique_ptr&& subscriber2): + metric_id_(metric_id), + subscriber1_(std::move(subscriber1)), + subscriber2_(std::move(subscriber2)) { + type_name_ = "Ratio: (" + + subscriber1_->GetMetricId().GetName() + ":" + subscriber1_->GetTypeName() + " / " + + subscriber2_->GetMetricId().GetName() + ":" + subscriber2_->GetTypeName() + ")"; + } + + virtual std::string GetTypeName() override { + return type_name_; + } + + virtual void OnUpdate(const std::shared_ptr report_ptr) override { + subscriber1_->OnUpdate(report_ptr); + subscriber2_->OnUpdate(report_ptr); + } + + virtual ReportItem Collect() override { + ReportItem ret; + auto subscriber1_ret = subscriber1_->Collect(); + auto subscriber2_ret = subscriber2_->Collect(); + //timestamp should be equal; + assert(subscriber1_ret.Time() == subscriber2_ret.Time()); + double ratio = (double)subscriber1_ret.Value() / subscriber2_ret.Value(); + ret.SetTimeValue({subscriber1_ret.Time(), + (isnan(ratio) ? -1 : static_cast(ratio))}); + ret.SetType(GetTypeName()); + return ret; + } + + const MetricId& GetMetricId() override { + return metric_id_; + } + + virtual ~RatioSubscriber() override {} + +private: + MetricId metric_id_; + std::unique_ptr subscriber1_; + std::unique_ptr subscriber2_; + std::string type_name_; +}; +} + diff --git a/src/common/metric/subscriber.h b/src/common/metric/subscriber.h new file mode 100644 index 000000000..6b0eb394b --- /dev/null +++ b/src/common/metric/subscriber.h @@ -0,0 +1,66 @@ +#pragma once +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#include +#include +#include +#include "common/metric/metric_id.h" + +namespace tera { + +using TimeValuePair = std::pair; + +class CollectorReport; + +struct ReportItem { + TimeValuePair time_value_pair; + std::string type; + ReportItem(TimeValuePair tvp = {-1, -1}, const std::string& t = ""): + time_value_pair(tvp), + type(t) { } + + int64_t Value() const { + return time_value_pair.second; + } + + int64_t Time() const { + return time_value_pair.first; + } + + void SetTimeValue(const TimeValuePair& tvp) { + time_value_pair = tvp; + } + + void SetType(const std::string& tp) { + type = tp; + } + + std::string Type() const { + return type; + } +}; + +class Subscriber { +public: + enum class SubscriberType { + LATEST, + MAX, + MIN, + QPS, + SUM, + THROUGHPUT + }; + virtual ~Subscriber() {} + // return a pair of to Prometheus + virtual ReportItem Collect() = 0; + // Update subscriber, depends to subscriber type + // Called in CollectorReportPublisher::Report() + virtual void OnUpdate(const std::shared_ptr) = 0; + virtual std::string GetTypeName() = 0; + virtual const MetricId& GetMetricId() = 0; +}; + +using SubscriberType = Subscriber::SubscriberType; +using SubscriberReport = std::unordered_multimap; +} \ No newline at end of file diff --git a/src/common/mutex.h b/src/common/mutex.h old mode 100644 new mode 100755 index 46e89044f..381a69218 --- a/src/common/mutex.h +++ b/src/common/mutex.h @@ -12,7 +12,8 @@ #include #include #include -#include "timer.h" +#include +#include "common/timer.h" namespace common { @@ -45,7 +46,7 @@ class Mutex { #ifdef MUTEX_DEBUG int64_t s = 0; if (msg) { - s = timer::get_micros(); + s = get_micros(); } #endif PthreadCall("mutex lock", pthread_mutex_lock(&mu_)); @@ -74,16 +75,16 @@ class Mutex { msg_ = msg; msg_threshold_ = msg_threshold; if (msg_) { - lock_time_ = timer::get_micros(); + lock_time_ = get_micros(); } #endif owner_ = pthread_self(); } void BeforeUnlock() { #ifdef MUTEX_DEBUG - if (msg_ && timer::get_micros() - lock_time_ > msg_threshold_) { + if (msg_ && get_micros() - lock_time_ > msg_threshold_) { printf("%s locked %.3f ms\n", - msg_, (timer::get_micros() - lock_time_) / 1000.0); + msg_, (get_micros() - lock_time_) / 1000.0); } msg_ = NULL; #endif @@ -137,11 +138,14 @@ class CondVar { } // Time wait in us // timeout < 0 would cause ETIMEOUT and return false immediately - bool TimeWaitInUs(int timeout, const char* msg = NULL) { + bool TimeWaitInUs(int64_t timeout, const char* msg = NULL) { // ref: http://www.qnx.com/developers/docs/6.5.0SP1.update/com.qnx.doc.neutrino_lib_ref/p/pthread_cond_timedwait.html struct timespec ts; clock_gettime(CLOCK_MONOTONIC, &ts); - int64_t nsec = ((int64_t)timeout) * 1000 + ts.tv_nsec; + int64_t nsec = timeout * 1000 + ts.tv_nsec; + + assert(nsec > 0); + ts.tv_sec += nsec / 1000000000; ts.tv_nsec = nsec % 1000000000; diff --git a/src/common/request_done_wrapper.h b/src/common/request_done_wrapper.h new file mode 100644 index 000000000..cd6b7b3b7 --- /dev/null +++ b/src/common/request_done_wrapper.h @@ -0,0 +1,29 @@ +#pragma once +#include + +namespace tera { +class RequestDoneWrapper : public google::protobuf::Closure { +public: + static google::protobuf::Closure* NewInstance(google::protobuf::Closure* done) { + return new RequestDoneWrapper(done); + } + + //Self-Deleted, never access it after Run(); + //Default do nothing; + virtual void Run() override { + delete this; + } + + virtual ~RequestDoneWrapper() { + done_->Run(); + } + +protected: + //Can Only Create on Heap; + RequestDoneWrapper(google::protobuf::Closure* done): + done_(done) { } + +private: + google::protobuf::Closure* done_; +}; +} \ No newline at end of file diff --git a/src/common/test/collector_report_test.cc b/src/common/test/collector_report_test.cc new file mode 100644 index 000000000..e01972cc9 --- /dev/null +++ b/src/common/test/collector_report_test.cc @@ -0,0 +1,179 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "common/metric/metric_counter.h" +#include "common/metric/collector_report.h" +#include "common/this_thread.h" + +namespace tera { + +class CollectorReportTest : public ::testing::Test { +public: + CollectorReportTest() + : nonperiod_counter1_label(LabelStringBuilder().Append("key1", "value1").ToString()), + nonperiod_counter1("counter1", nonperiod_counter1_label, {}, false), + nonperiod_counter2("counter2", {}, false), + period_counter1_label(LabelStringBuilder().Append("key2", "value2").ToString()), + period_counter1("counter1", period_counter1_label, {}, true), + period_counter3("counter3", {}, true) { + other_whatever_ids.push_back(MetricId()); + other_whatever_ids.push_back(MetricId("whatevername")); + + MetricLabels whatever_labels; + whatever_labels["haha"] = "hehe"; + whatever_labels["heihei"] = "hoho"; + other_whatever_ids.push_back(MetricId("", whatever_labels)); + other_whatever_ids.push_back(MetricId("whatevername", whatever_labels)); + } + + virtual void SetUp() { + nonperiod_counter1.Set(1); + nonperiod_counter2.Set(2); + period_counter1.Set(3); + period_counter3.Set(4); + } + + virtual void TearDown() { + // reset cache to initial status + CollectorReportPublisher::GetInstance().last_collector_report_.reset(new CollectorReport()); + } +private: + std::string nonperiod_counter1_label; + MetricCounter nonperiod_counter1; + MetricCounter nonperiod_counter2; + std::string period_counter1_label; + MetricCounter period_counter1; + MetricCounter period_counter3; + + std::vector other_whatever_ids; +}; + +TEST_F(CollectorReportTest, FindTest) { + int64_t value = 0; + CollectorReportPublisher::GetInstance().Refresh(); + std::shared_ptr report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + + // check report + EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); + + // nonperiod_counter1 + value = report->FindMetricValue("counter1", nonperiod_counter1_label); + EXPECT_EQ(value, 1); + value = report->FindMetricValue(nonperiod_counter1.metric_id_); + EXPECT_EQ(value, 1); + value = report->FindMetricValue("counter1"); + EXPECT_EQ(value, 0); + value = report->FindMetricValue("counter1", "other not exist label"); + EXPECT_EQ(value, 0); + value = report->FindMetricValue("not exist name", nonperiod_counter1_label); + EXPECT_EQ(value, 0); + value = report->FindMetricValue(MetricId("counter1")); + EXPECT_EQ(value, 0); + + // nonperiod_counter2 + value = report->FindMetricValue("counter2"); + EXPECT_EQ(value, 2); + value = report->FindMetricValue("counter2", ""); + EXPECT_EQ(value, 2); + value = report->FindMetricValue(MetricId("counter2")); + EXPECT_EQ(value, 2); + value = report->FindMetricValue("counter2", "whatever_label"); + EXPECT_EQ(value, 0); + + // period_counter1 + value = report->FindMetricValue("counter1", period_counter1_label); + EXPECT_EQ(value, 3); + value = report->FindMetricValue(period_counter1.metric_id_); + EXPECT_EQ(value, 3); + + // period_counter3 + value = report->FindMetricValue("counter3"); + EXPECT_EQ(value, 4); + value = report->FindMetricValue(period_counter3.metric_id_); + EXPECT_EQ(value, 4); + + // invalid + for (const MetricId& not_exist_id : other_whatever_ids) { + value = report->FindMetricValue(not_exist_id.GetName()); + EXPECT_EQ(value, 0); + value = report->FindMetricValue(not_exist_id.ToString()); + EXPECT_EQ(value, 0); + value = report->FindMetricValue(not_exist_id); + EXPECT_EQ(value, 0); + } + + // report again + nonperiod_counter1.Inc(); + nonperiod_counter2.Inc(); + period_counter1.Inc(); + period_counter3.Inc(); + MetricCounter another_counter1("another1"); + MetricCounter another_counter2("another2"); + another_counter1.Inc(); + CollectorReportPublisher::GetInstance().Refresh(); + report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); + + value = report->FindMetricValue(nonperiod_counter1.metric_id_); + EXPECT_EQ(value, 2); + value = report->FindMetricValue(nonperiod_counter2.metric_id_); + EXPECT_EQ(value, 3); + value = report->FindMetricValue(period_counter1.metric_id_); + EXPECT_EQ(value, 1); + value = report->FindMetricValue(period_counter3.metric_id_); + EXPECT_EQ(value, 1); + value = report->FindMetricValue(another_counter1.metric_id_); + EXPECT_EQ(value, 1); + value = report->FindMetricValue(another_counter2.metric_id_); + EXPECT_EQ(value, 0); +} + +TEST_F(CollectorReportTest, CacheTest) { + // do not update yet + std::shared_ptr initial_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_TRUE(initial_report.get() != NULL); + EXPECT_TRUE(initial_report->report.empty()); + + // update + CollectorReportPublisher::GetInstance().Refresh(); + std::shared_ptr report1 = CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_EQ(report1->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); + EXPECT_TRUE(report1.get() == CollectorReportPublisher::GetInstance().last_collector_report_.get()); + + // modify counters and report again + nonperiod_counter1.Inc(); + nonperiod_counter2.Inc(); + period_counter1.Inc(); + period_counter3.Inc(); + MetricCounter another_counter1("another1"); + MetricCounter another_counter2("another2"); + another_counter1.Inc(); + + // get report before update, return same ptr + std::shared_ptr report2 = CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_TRUE(report2.get() == CollectorReportPublisher::GetInstance().last_collector_report_.get()); + EXPECT_TRUE(report2.get() == report1.get()); + EXPECT_EQ(report2->FindMetricValue(period_counter3.metric_id_), 4); + + // update and get + CollectorReportPublisher::GetInstance().Refresh(); + std::shared_ptr report3 = CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_TRUE(report3.get() == CollectorReportPublisher::GetInstance().last_collector_report_.get()); + EXPECT_FALSE(report3.get() == report1.get()); + EXPECT_EQ(report3->report.size(), report2->report.size() + 2); + EXPECT_EQ(report3->FindMetricValue(period_counter3.metric_id_), 1); +} + +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/test/common_test_main.cc b/src/common/test/common_test_main.cc new file mode 100644 index 000000000..90c3b06dd --- /dev/null +++ b/src/common/test/common_test_main.cc @@ -0,0 +1,30 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "utils/utils_cmd.h" + +int main(int argc, char** argv) { + ::google::InitGoogleLogging(argv[0]); + FLAGS_v = 16; + FLAGS_minloglevel=0; + FLAGS_log_dir = "./log"; + if (access(FLAGS_log_dir.c_str(), F_OK)) { + mkdir(FLAGS_log_dir.c_str(), 0777); + } + std::string pragram_name("tera"); + tera::utils::SetupLog(pragram_name); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/utils/test/counter_test.cc b/src/common/test/counter_test.cc similarity index 95% rename from src/utils/test/counter_test.cc rename to src/common/test/counter_test.cc index 526f9cae6..598c98f04 100644 --- a/src/utils/test/counter_test.cc +++ b/src/common/test/counter_test.cc @@ -11,7 +11,7 @@ #include "common/mutex.h" #include "common/thread_pool.h" -#include "counter.h" +#include "common/counter.h" namespace tera { @@ -69,7 +69,7 @@ TEST(CounterTest, Basic) { Counter counter; ThreadPool* pool = new ThreadPool(thread_num); for (int i = 0; i < thread_num / 4; ++i) { - std::function callback = + std::function callback = std::bind(&callback_add, &counter); pool->AddTask(callback); @@ -99,7 +99,7 @@ TEST(CounterTest, Clear) { Counter counter; ThreadPool* pool = new ThreadPool(thread_num); for (int i = 0; i < thread_num / 3; ++i) { - std::function callback = + std::function callback = std::bind(&callback_add, &counter); pool->AddTask(callback); diff --git a/src/common/test/log_cleaner_test.cc b/src/common/test/log_cleaner_test.cc new file mode 100644 index 000000000..8fbf3ef9f --- /dev/null +++ b/src/common/test/log_cleaner_test.cc @@ -0,0 +1,246 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + + +#include +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "common/file/file_path.h" +#include "common/log/log_cleaner.h" +#include "common/this_thread.h" +#include "utils/utils_cmd.h" + +DECLARE_string(log_dir); +DECLARE_string(tera_log_prefix); +DECLARE_string(tera_leveldb_log_path); +DECLARE_int64(tera_info_log_clean_period_second); +DECLARE_int64(tera_info_log_expire_second); + +using namespace std::placeholders; + +namespace common { + +static size_t g_touch_file_count = 0; +static size_t g_expect_clean_count = 0; +const static int64_t kTestLogExpireSecond = 5; + +std::string TouchFile(const std::string& dir_path, const std::string& filename, bool need_close = true) { + std::string full_path = dir_path + "/" + filename; + int fd = open(full_path.c_str(), O_RDWR | O_CREAT | O_TRUNC, 0777); + if (need_close && fd > 0) { + close(fd); + } + ++g_touch_file_count; + return full_path; +} + +void SetupTestEnv() { + std::string leveldb_log_prefix = "leveldb.log"; + FLAGS_tera_leveldb_log_path = "./log/" + leveldb_log_prefix; + // fake options, change log dir for cleaner + FLAGS_log_dir = "./test_log"; + FLAGS_tera_log_prefix = "tera_test"; + FLAGS_tera_info_log_clean_period_second = 1; + FLAGS_tera_info_log_expire_second = kTestLogExpireSecond; + std::string other_prefix = "tera_other_prefix"; + + // make test log dir, ignore failture + mkdir(FLAGS_log_dir.c_str(), 0777); + g_touch_file_count = 0; + g_expect_clean_count = 0; + + // touch file unlinked + std::string unlinked_info = FLAGS_tera_log_prefix + ".INFO.unlink"; + TouchFile(FLAGS_log_dir, unlinked_info); + std::string unlinked_warn = FLAGS_tera_log_prefix + ".WARNING.unlink"; + TouchFile(FLAGS_log_dir, unlinked_warn); + std::string unlinked_err = FLAGS_tera_log_prefix + ".stderr.unlink"; + TouchFile(FLAGS_log_dir, unlinked_err); + g_expect_clean_count += 3; // expect clean unlinked file + + // touch file linked + std::string linked_info = FLAGS_tera_log_prefix + ".INFO.linked"; + std::string info_link_path = FLAGS_log_dir + "/" + FLAGS_tera_log_prefix + ".INFO"; + std::string linked_info_path = TouchFile(FLAGS_log_dir, linked_info); + // link full path + remove(info_link_path.c_str()); + symlink(linked_info_path.c_str(), info_link_path.c_str()); + ++g_touch_file_count; + + std::string linked_warn = FLAGS_tera_log_prefix + ".WARNING.linked"; + std::string warn_link_path = FLAGS_log_dir + "/" + FLAGS_tera_log_prefix + ".WARNING"; + TouchFile(FLAGS_log_dir, linked_warn); + // link filename only + remove(warn_link_path.c_str()); + symlink(linked_warn.c_str(), warn_link_path.c_str()); + ++g_touch_file_count; + + // touch file opened + std::string opened_info = FLAGS_tera_log_prefix + ".INFO.opened"; + TouchFile(FLAGS_log_dir, opened_info, false); + std::string opened_warn = FLAGS_tera_log_prefix + ".WARNING.opened"; + TouchFile(FLAGS_log_dir, opened_warn, false); + std::string opened_err = FLAGS_tera_log_prefix + ".stderr.opened"; + TouchFile(FLAGS_log_dir, opened_err, false); + + // touch file not start with prefix + std::string other_pre_info = other_prefix + ".INFO.otherpre"; + TouchFile(FLAGS_log_dir, other_pre_info); + std::string other_pre_warn = other_prefix + ".WARNING.otherpre"; + TouchFile(FLAGS_log_dir, other_pre_warn); + std::string other_pre_err = other_prefix + ".stderr.otherpre"; + TouchFile(FLAGS_log_dir, other_pre_err); + + // touch file start with leveldb_log_prefix and open one of them + std::string ldb_pre_info = leveldb_log_prefix; + TouchFile(FLAGS_log_dir, ldb_pre_info, false); + std::string ldb_pre_info_lod = leveldb_log_prefix + ".old"; + TouchFile(FLAGS_log_dir, ldb_pre_info_lod); + g_expect_clean_count++; // expect clean leveldb_log_prefix.old +} + +TEST(LogCleanerTest, InitialStatus) { + // ensure stop firstly + LogCleaner::StopCleaner(); + ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); + SetupTestEnv(); + LogCleaner *cleaner = LogCleaner::GetInstance(); + + ASSERT_FALSE(cleaner == NULL); + ASSERT_FALSE(cleaner->IsRunning()); + ASSERT_TRUE(cleaner->CheckOptions()); + ASSERT_FALSE(cleaner->stop_); +} + +TEST(LogCleanerTest, Basic) { + SetupTestEnv(); + // get instance + LogCleaner *cleaner = LogCleaner::GetInstance(); + ASSERT_FALSE(cleaner == NULL); + + // check log dir before clean + std::vector reserved_file_list; + bool list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); + ASSERT_TRUE(list_ret); + + // print filelist before clean + std::cout << "before clean. file count: " << reserved_file_list.size() << std::endl; + for (size_t i = 0; i < reserved_file_list.size(); ++i) { + std::cout << reserved_file_list[i] << std::endl; + } + ASSERT_EQ(reserved_file_list.size(), g_touch_file_count); + + // start and stop + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + ASSERT_FALSE(cleaner->stop_); + + { + // wait schedule clean first times + MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); + cleaner->bg_cond_.Wait(); + } + + // check clean result + reserved_file_list.clear(); + list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); + ASSERT_TRUE(list_ret); + // print filelist after clean + std::cout << "first clean. expect clean nothing since not expire yet" << std::endl; + EXPECT_EQ(reserved_file_list.size(), g_touch_file_count); + + { + // wait schedule clean second times + MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); + cleaner->bg_cond_.Wait(); + } + // check clean result + reserved_file_list.clear(); + list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); + ASSERT_TRUE(list_ret); + std::cout << "second clean. expect clean nothing since not expire yet" << std::endl; + EXPECT_EQ(reserved_file_list.size(), g_touch_file_count); + + for (size_t i = 3; i < kTestLogExpireSecond + 5; ++i) { + // wait schedule clean several times + std::cout << "wait " << i << " times clean." << std::endl; + MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); + cleaner->bg_cond_.Wait(); + } + // check clean result + reserved_file_list.clear(); + list_ret = ListCurrentDir(cleaner->info_log_dir_, &reserved_file_list); + ASSERT_TRUE(list_ret); + std::cout << "after " << kTestLogExpireSecond + << " times clean. expect clean " << g_expect_clean_count + << " logs: " << std::endl; + // print filelist after clean + for (size_t i = 0; i < reserved_file_list.size(); ++i) { + std::cout << reserved_file_list[i] << std::endl; + } + EXPECT_EQ(reserved_file_list.size(), g_touch_file_count - g_expect_clean_count); + + // stop cleaner + cleaner->Stop(); + ASSERT_FALSE(cleaner->IsRunning()); + ASSERT_TRUE(cleaner->stop_); + ASSERT_FALSE(cleaner == NULL); + + // destroy + LogCleaner::StopCleaner(); + ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); +} + +TEST(LogCleanerTest, MultiStartAndStop) { + // ensure stop firstly + LogCleaner::StopCleaner(); + ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); + + SetupTestEnv(); + // get instance + LogCleaner *cleaner = LogCleaner::GetInstance(); + + // stop while not start + cleaner->Stop(); + ASSERT_FALSE(cleaner->IsRunning()); + ASSERT_TRUE(cleaner->stop_); + + // start three times + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + + { + // wait schedule clean + MutexLock l(&(cleaner->mutex_), "log cleaner unittest"); + cleaner->bg_cond_.Wait(); + } + + // stop twice + cleaner->Stop(); + ASSERT_FALSE(cleaner->IsRunning()); + cleaner->Stop(); + ASSERT_FALSE(cleaner->IsRunning()); + + // start again + cleaner->Start(); + ASSERT_TRUE(cleaner->IsRunning()); + + // stop and destroy + LogCleaner::StopCleaner(); + ASSERT_TRUE(LogCleaner::singleton_instance_ == NULL); +} + +} // end namespace common + diff --git a/src/common/test/metric_counter_test.cc b/src/common/test/metric_counter_test.cc new file mode 100644 index 000000000..00062b8ff --- /dev/null +++ b/src/common/test/metric_counter_test.cc @@ -0,0 +1,97 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "common/metric/metric_counter.h" + +namespace tera { + +class MetricCounterTest : public ::testing::Test { +public: + virtual void SetUp() { + label_str_ = LabelStringBuilder() + .Append("test_label1", "test_value1") + .Append("test_label2", "test_value2") + .ToString(); + } + + virtual void TearDown() {} + +private: + std::string label_str_; +}; + +TEST_F(MetricCounterTest, RegisterTest) { + MetricId test_id; + { + // with name and labels + MetricCounter counter1("counter1", label_str_); + test_id = counter1.metric_id_; + + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter1.metric_id_)) + << "metric_id " << counter1.metric_id_.ToString() << std::endl; + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + EXPECT_TRUE(counter1.IsRegistered()); + } + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + + { + // with name only + MetricCounter counter2("counter2", {}, true); + test_id = counter2.metric_id_; + + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(counter2.metric_id_)) + << "metric_id " << counter2.metric_id_.ToString() << std::endl; + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + EXPECT_TRUE(counter2.IsRegistered()); + } + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id)) + << "metric_id " << test_id.ToString() << std::endl; + + // with illegal label string + ASSERT_THROW(MetricCounter("counter3", "illegal_label_string", {}, true), std::invalid_argument); + + // with empty name + ASSERT_THROW(MetricCounter("", label_str_, {}, true), std::invalid_argument); + ASSERT_THROW(MetricCounter("", {}, true), std::invalid_argument); +} + +TEST_F(MetricCounterTest, CollectTest) { + MetricCounter periodic_counter("periodic", label_str_, {}, true); + MetricCounter nonperiodic_counter("nonperiodic", label_str_, {}, false); + + for (size_t i = 0; i < 3; ++i) { + periodic_counter.Inc(); + nonperiodic_counter.Inc(); + } + EXPECT_EQ(periodic_counter.Get(), 3); + EXPECT_EQ(nonperiodic_counter.Get(), 3); + + // do collect + CollectorReportPublisher::GetInstance().Refresh(); + + EXPECT_EQ(periodic_counter.Get(), 0); + EXPECT_EQ(nonperiodic_counter.Get(), 3); + + periodic_counter.Inc(); + nonperiodic_counter.Inc(); + EXPECT_EQ(periodic_counter.Get(), 1); + EXPECT_EQ(nonperiodic_counter.Get(), 4); +} + +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/test/metric_http_server_test.cc b/src/common/test/metric_http_server_test.cc new file mode 100644 index 000000000..c911b438e --- /dev/null +++ b/src/common/test/metric_http_server_test.cc @@ -0,0 +1,138 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "common/metric/metric_counter.h" +#include "common/metric/metric_http_server.h" +#include "common/metric/collector_report.h" +#include "common/base/string_ext.h" + +namespace tera { + +class MetricHttpServerTest : public ::testing::Test { +public: + virtual void SetUp() { + // register metrics + test_counter = new MetricCounter("counter", {SubscriberType::LATEST}); + server = new MetricHttpServer; + test_counter->Set(1); + } + virtual void TearDown() { + delete test_counter; + delete server; + } + +private: + MetricCounter* test_counter; + MetricHttpServer* server; +}; + +TEST_F(MetricHttpServerTest, BuildType) { + std::string body; + ResponseBodyBuilder::BuildType(&body, "good", "gauge"); + EXPECT_STREQ(body.c_str(), "# TYPE good gauge\n"); + ResponseBodyBuilder::BuildType(&body, "bad", "summary"); + EXPECT_STREQ(body.c_str(), "# TYPE good gauge\n" + "# TYPE bad summary\n"); +} + +TEST_F(MetricHttpServerTest, BuildHelp) { + std::string body; + ResponseBodyBuilder::BuildHelp(&body, "good", "good"); + EXPECT_STREQ(body.c_str(), "# HELP good good\n"); + ResponseBodyBuilder::BuildHelp(&body, "bad", "bad"); + EXPECT_STREQ(body.c_str(), "# HELP good good\n" + "# HELP bad bad\n"); +} + +TEST_F(MetricHttpServerTest, BuildMetricItem) { + CollectorReportPublisher::GetInstance().Refresh(); + auto report = CollectorReportPublisher::GetInstance().GetSubscriberReport(); + + std::string body; + int64_t time_stamp; + + for (const auto& item : *report) { + if (item.first.GetName() == "counter") { + ResponseBodyBuilder::BuildMetricItem(&body, item.first, item.second); + time_stamp = item.second.Time(); + } + } + std::string expect_body = "counter{value_type=\"Latest\"} 1 " + + std::to_string(time_stamp) + "\n"; + + EXPECT_EQ(body, expect_body); + EXPECT_EQ(test_counter->Get(), 0); + test_counter->Set(2); + + CollectorReportPublisher::GetInstance().Refresh(); + report = CollectorReportPublisher::GetInstance().GetSubscriberReport(); + + for (const auto& item : *report) { + if (item.first.GetName() == "counter") { + ResponseBodyBuilder::BuildMetricItem(&body, item.first, item.second); + time_stamp = item.second.Time(); + } + } + + expect_body += "counter{value_type=\"Latest\"} 2 " + + std::to_string(time_stamp) + "\n"; + + EXPECT_EQ(body, expect_body); +} + +TEST_F(MetricHttpServerTest, GetResponseBody) { + CollectorReportPublisher::GetInstance().Refresh(); + int64_t timestamp = CollectorReportPublisher::GetInstance().GetCollectorReport()->timestamp_ms; + std::string body = server->GetResponseBody(); + std::vector splited_string; + SplitString(body, "\n", &splited_string); + bool find_counter = false; + for (int idx = 0; idx != splited_string.size(); ++ idx) { + if (splited_string[idx].substr(0, 8) == "counter{") { + find_counter = true; + EXPECT_STREQ(splited_string[idx - 2].c_str(), + "# HELP counter counter"); + EXPECT_STREQ(splited_string[idx - 1].c_str(), + "# TYPE counter gauge"); + std::string expected_line = "counter{value_type=\"Latest\"} 1 " + std::to_string(timestamp); + EXPECT_EQ(expected_line, splited_string[idx]); + } + } + EXPECT_TRUE(find_counter); + EXPECT_EQ(test_counter->Get(), 0); + test_counter->Set(19); + find_counter = false; + + CollectorReportPublisher::GetInstance().Refresh(); + timestamp = CollectorReportPublisher::GetInstance().GetCollectorReport()->timestamp_ms; + body = server->GetResponseBody(); + splited_string.clear(); + SplitString(body, "\n", &splited_string); + for (int idx = 0; idx != splited_string.size(); ++ idx) { + if (splited_string[idx].substr(0, 8) == "counter{") { + find_counter = true; + EXPECT_STREQ(splited_string[idx - 2].c_str(), + "# HELP counter counter"); + EXPECT_STREQ(splited_string[idx - 1].c_str(), + "# TYPE counter gauge"); + std::string expected_line = "counter{value_type=\"Latest\"} 19 " + std::to_string(timestamp); + EXPECT_EQ(expected_line, splited_string[idx]); + } + } + + EXPECT_TRUE(find_counter); + EXPECT_EQ(test_counter->Get(), 0); +} +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/test/metric_id_test.cc b/src/common/test/metric_id_test.cc new file mode 100644 index 000000000..ad2795073 --- /dev/null +++ b/src/common/test/metric_id_test.cc @@ -0,0 +1,178 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "common/metric/metric_id.h" + +namespace tera { + +static const std::string kTestMetricName = "test_name"; + +class MetricIdTest : public ::testing::Test { +public: + virtual void SetUp() { + empty_id_ = new MetricId(); + id_with_name_ = new MetricId(kTestMetricName); + + MetricLabels label_map; + label_map.insert(std::make_pair("test_label1", "test_value1")); + label_map.insert(std::make_pair("test_label2", "test_value2")); + label_str_ = "test_label1:test_value1,test_label2:test_value2"; + + id_with_label_ = new MetricId("", label_map); + id_with_name_and_label_ = new MetricId(kTestMetricName, label_map); + } + + virtual void TearDown() { + delete empty_id_; + delete id_with_name_; + delete id_with_label_; + delete id_with_name_and_label_; + } + +private: + MetricId *empty_id_; + MetricId *id_with_name_; + MetricId *id_with_label_; + MetricId *id_with_name_and_label_; + std::string label_str_; +}; + +TEST_F(MetricIdTest, BasicTest) { + // empty id + ASSERT_FALSE(empty_id_->IsValid()); + ASSERT_TRUE(empty_id_->GetName().empty()); + ASSERT_TRUE(empty_id_->GetLabelMap().empty()); + ASSERT_TRUE(empty_id_->ToString().empty()); + ASSERT_TRUE(empty_id_->GetLabel("whatever_label").empty()); + ASSERT_FALSE(empty_id_->ExistLabel("whatever_label")); + ASSERT_FALSE(empty_id_->CheckLabel("whatever_label", "whatever_value")); + + // id with name, empty label + ASSERT_TRUE(id_with_name_->IsValid()); + ASSERT_STREQ(id_with_name_->GetName().c_str(), kTestMetricName.c_str()); + ASSERT_TRUE(id_with_name_->GetLabelMap().empty()); + ASSERT_STREQ(id_with_name_->ToString().c_str(), kTestMetricName.c_str()); + ASSERT_TRUE(id_with_name_->GetLabel("whatever_label").empty()); + ASSERT_FALSE(id_with_name_->ExistLabel("whatever_label")); + ASSERT_FALSE(id_with_name_->CheckLabel("whatever_label", "whatever_value")); + + // id with name and label + ASSERT_TRUE(id_with_name_and_label_->IsValid()); + ASSERT_STREQ(id_with_name_and_label_->GetName().c_str(), kTestMetricName.c_str()); + ASSERT_EQ(id_with_name_and_label_->GetLabelMap().size(), 2); + + std::string expected_id_str = kTestMetricName + kNameLabelsDelimiter + label_str_; + ASSERT_STREQ(id_with_name_and_label_->ToString().c_str(), expected_id_str.c_str()); + ASSERT_STREQ(id_with_name_and_label_->GetLabel("test_label1").c_str(), "test_value1"); + ASSERT_TRUE(id_with_name_and_label_->ExistLabel("test_label1")); + ASSERT_TRUE(id_with_name_and_label_->CheckLabel("test_label1", "test_value1")); + + ASSERT_TRUE(id_with_name_and_label_->GetLabel("not_exist_label").empty()); + ASSERT_FALSE(id_with_name_and_label_->ExistLabel("not_exist_label")); + ASSERT_FALSE(id_with_name_and_label_->CheckLabel("not_exist_label", "test_value1")); + ASSERT_FALSE(id_with_name_and_label_->CheckLabel("test_label1", "test_value2")); + + // id with label, empty name + ASSERT_FALSE(id_with_label_->IsValid()); +} + +TEST_F(MetricIdTest, CopyTest) { + // copy id + MetricId copy_id(*id_with_name_and_label_); + ASSERT_TRUE(copy_id.IsValid()); + ASSERT_STREQ(copy_id.GetName().c_str(), id_with_name_and_label_->GetName().c_str()); + ASSERT_EQ(copy_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); + ASSERT_STREQ(copy_id.ToString().c_str(), id_with_name_and_label_->ToString().c_str()); + ASSERT_STREQ(copy_id.GetLabel("test_label1").c_str(), "test_value1"); + ASSERT_TRUE(copy_id.ExistLabel("test_label1")); + ASSERT_TRUE(copy_id.CheckLabel("test_label1", "test_value1")); + + ASSERT_TRUE(copy_id.GetLabel("not_exist_label").empty()); + ASSERT_FALSE(copy_id.ExistLabel("not_exist_label")); + ASSERT_FALSE(copy_id.CheckLabel("not_exist_label", "test_value1")); + ASSERT_FALSE(copy_id.CheckLabel("test_label1", "test_value2")); + ASSERT_TRUE(copy_id == *id_with_name_and_label_); + + // assign id + MetricId assign_id; + assign_id = *id_with_name_and_label_; + ASSERT_TRUE(assign_id.IsValid()); + ASSERT_STREQ(assign_id.GetName().c_str(), id_with_name_and_label_->GetName().c_str()); + ASSERT_EQ(assign_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); + ASSERT_STREQ(assign_id.ToString().c_str(), id_with_name_and_label_->ToString().c_str()); + ASSERT_STREQ(assign_id.GetLabel("test_label1").c_str(), "test_value1"); + ASSERT_TRUE(assign_id.ExistLabel("test_label1")); + ASSERT_TRUE(assign_id.CheckLabel("test_label1", "test_value1")); + + ASSERT_TRUE(assign_id.GetLabel("not_exist_label").empty()); + ASSERT_FALSE(assign_id.ExistLabel("not_exist_label")); + ASSERT_FALSE(assign_id.CheckLabel("not_exist_label", "test_value1")); + ASSERT_FALSE(assign_id.CheckLabel("test_label1", "test_value2")); + ASSERT_TRUE(assign_id == *id_with_name_and_label_); +} + +TEST_F(MetricIdTest, BuildTest) { + MetricId test_id; + bool ret = false; + + std::string legal_label_str = LabelStringBuilder() + .Append("test_label1", "test_value1") + .Append("test_label2", "test_value2") + .ToString(); + ASSERT_STREQ(legal_label_str.c_str(), label_str_.c_str()); + + ret = MetricId::ParseFromString(kTestMetricName, legal_label_str, &test_id); + ASSERT_TRUE(ret) << "Parse label string: " << legal_label_str << ", failed" << std::endl; + ASSERT_TRUE(test_id.IsValid()); + ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); + ASSERT_EQ(test_id.GetLabelMap().size(), id_with_name_and_label_->GetLabelMap().size()); + std::string expected_id_str = kTestMetricName + kNameLabelsDelimiter + legal_label_str; + ASSERT_STREQ(test_id.ToString().c_str(), expected_id_str.c_str()); + + std::string single_label_str = LabelStringBuilder() + .Append("test_label1", "test_value1") + .ToString(); + ASSERT_STREQ(single_label_str.c_str(), "test_label1:test_value1"); + ret = MetricId::ParseFromString(kTestMetricName, single_label_str, &test_id); + ASSERT_TRUE(ret) << "Parse label string: " << single_label_str << ", failed" << std::endl; + ASSERT_TRUE(test_id.IsValid()); + ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); + ASSERT_EQ(test_id.GetLabelMap().size(), 1); + expected_id_str = kTestMetricName + kNameLabelsDelimiter + single_label_str; + ASSERT_STREQ(test_id.ToString().c_str(), expected_id_str.c_str()); + + std::string empty_label_str = LabelStringBuilder().ToString(); + ASSERT_STREQ(empty_label_str.c_str(), ""); + ret = MetricId::ParseFromString(kTestMetricName, empty_label_str, &test_id); + ASSERT_TRUE(ret); + ASSERT_TRUE(test_id.IsValid()); + ASSERT_STREQ(test_id.GetName().c_str(), kTestMetricName.c_str()); + ASSERT_TRUE(test_id.GetLabelMap().empty()); + ASSERT_STREQ(test_id.ToString().c_str(), kTestMetricName.c_str()); + + std::vector illegal_label_str_vec; + illegal_label_str_vec.push_back("haha:hehe,,,,"); + illegal_label_str_vec.push_back("haha:hehe,hoho"); + illegal_label_str_vec.push_back("haha:hehe,hoho:heihei,"); + illegal_label_str_vec.push_back("haha"); + illegal_label_str_vec.push_back(",lalala"); + + for (const std::string& illegal_label : illegal_label_str_vec) { + ret = MetricId::ParseFromString(kTestMetricName, illegal_label, &test_id); + ASSERT_FALSE(ret); + } +} + +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/test/metrics_test.cc b/src/common/test/metrics_test.cc new file mode 100644 index 000000000..7bc5e9abb --- /dev/null +++ b/src/common/test/metrics_test.cc @@ -0,0 +1,187 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "common/metric/metric_counter.h" +#include "common/metric/hardware_collectors.h" +#include "common/metric/collector_report_publisher.h" +#include "common/this_thread.h" + +DECLARE_int64(tera_hardware_collect_period_second); + +namespace tera { + +class MetricsTest : public ::testing::Test { +public: + virtual void SetUp() { + // shorter period for test + FLAGS_tera_hardware_collect_period_second = 1; + CollectorReportPublisher::GetInstance().AddHardwareCollectors(); + + label_map_["test_label1"] = "test_value1"; + label_map_["test_label2"] = "test_value2"; + } + + virtual void TearDown() { + CollectorReportPublisher::GetInstance().collectors_.clear(); + label_map_.clear(); + } + +private: + MetricLabels label_map_; +}; + +static void PrintCollectorReportPublisher() { + std::cout << "Print Metric Registry: " << std::endl; + auto& metric_map = CollectorReportPublisher::GetInstance().collectors_; + auto metric_iter = metric_map.begin(); + for (; metric_iter != metric_map.end(); ++metric_iter) { + std::cout << metric_iter->first.ToString() << std::endl; + } +} + +TEST_F(MetricsTest, RegisterTest) { + // hardware metrics + ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstCpuMetricName))); + ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstMemMetricName))); + ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstNetRXMetricName))); + ASSERT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId(kInstNetTXMetricName))); + + bool ret = false; + Counter* test_counters = new Counter[5]; + // register a counter + MetricId test_id_1("test_counter", label_map_); + ret = CollectorReportPublisher::GetInstance().AddCollector( + test_id_1, std::unique_ptr(new CounterCollector(&test_counters[0]))); + EXPECT_TRUE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); + PrintCollectorReportPublisher(); + + // register a counter with different name + MetricId test_id_2("test_counter_2", label_map_); + ret = CollectorReportPublisher::GetInstance().AddCollector( + test_id_2, std::unique_ptr(new CounterCollector(&test_counters[0]))); + EXPECT_TRUE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); + PrintCollectorReportPublisher(); + + // register a counter with name only + ret = CollectorReportPublisher::GetInstance().AddCollector( + MetricId("test_counter3"), std::unique_ptr(new CounterCollector(&test_counters[2]))); + EXPECT_TRUE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); + PrintCollectorReportPublisher(); + + // register a counter with same name and different labels + label_map_["test_label2"] = "other_label_value"; + MetricId test_id_4("test_counter", label_map_); + ret = CollectorReportPublisher::GetInstance().AddCollector( + test_id_4, std::unique_ptr(new CounterCollector(&test_counters[3]))); + EXPECT_TRUE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_4)); + PrintCollectorReportPublisher(); + + // register a counter with same id + ret = CollectorReportPublisher::GetInstance().AddCollector( + test_id_1, std::unique_ptr(new CounterCollector(&test_counters[4]))); + EXPECT_FALSE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); + PrintCollectorReportPublisher(); + + ret = CollectorReportPublisher::GetInstance().AddCollector( + MetricId("test_counter3"), std::unique_ptr(new CounterCollector(&test_counters[4]))); + EXPECT_FALSE(ret); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); + PrintCollectorReportPublisher(); + + // unregister + ret = CollectorReportPublisher::GetInstance().DeleteCollector(test_id_1); + EXPECT_TRUE(ret); + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(test_id_1)); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); + + ret = CollectorReportPublisher::GetInstance().DeleteCollector(MetricId("test_counter3")); + EXPECT_TRUE(ret); + EXPECT_FALSE(CollectorReportPublisher::GetInstance().HasCollector(MetricId("test_counter3"))); + EXPECT_TRUE(CollectorReportPublisher::GetInstance().HasCollector(test_id_2)); + + MetricId not_registered_id("not_registered_name", label_map_); + ret = CollectorReportPublisher::GetInstance().DeleteCollector(not_registered_id); + EXPECT_FALSE(ret); + + label_map_["test_label2"] = "not_registered_value"; + MetricId not_registered_id_2("test_counter", label_map_); + ret = CollectorReportPublisher::GetInstance().DeleteCollector(not_registered_id_2); + EXPECT_FALSE(ret); + + ret = CollectorReportPublisher::GetInstance().DeleteCollector(MetricId("not_registered_name")); + EXPECT_FALSE(ret); + + delete[] test_counters; +} + +TEST_F(MetricsTest, ReportTest) { + // check report cache + int64_t value = 0; + + // register 2 counter + std::string label_str = LabelStringBuilder() + .Append("test_label1", "test_value1") + .Append("test_label2", "test_value2") + .ToString(); + MetricCounter periodic_counter("periodic", label_str, {}, true); + MetricCounter nonperiodic_counter("nonperiodic", label_str, {}, false); + + for (size_t i = 0; i < 3; ++i) { + periodic_counter.Inc(); + nonperiodic_counter.Inc(); + } + EXPECT_EQ(periodic_counter.Get(), 3); + EXPECT_EQ(nonperiodic_counter.Get(), 3); + + // do collect + ThisThread::Sleep(10); + + CollectorReportPublisher::GetInstance().Refresh(); + std::shared_ptr report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + + EXPECT_EQ(periodic_counter.Get(), 0); + EXPECT_EQ(nonperiodic_counter.Get(), 3); + + // check report + EXPECT_EQ(report->report.size(), CollectorReportPublisher::GetInstance().collectors_.size()); + value = report->FindMetricValue("periodic", label_str); + EXPECT_EQ(value, 3); + value = report->FindMetricValue("nonperiodic", label_str); + EXPECT_EQ(value, 3); + + // change counter value + periodic_counter.Inc(); + nonperiodic_counter.Dec(); + EXPECT_EQ(periodic_counter.Get(), 1); + EXPECT_EQ(nonperiodic_counter.Get(), 2); + + // report again + CollectorReportPublisher::GetInstance().Refresh(); + report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + EXPECT_EQ(periodic_counter.Get(), 0); + EXPECT_EQ(nonperiodic_counter.Get(), 2); + + value = report->FindMetricValue("periodic", label_str); + EXPECT_EQ(value, 1); + value = report->FindMetricValue("nonperiodic", label_str); + EXPECT_EQ(value, 2); +} + +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/test/profiler_test.cc b/src/common/test/profiler_test.cc new file mode 100644 index 000000000..623d1c0f4 --- /dev/null +++ b/src/common/test/profiler_test.cc @@ -0,0 +1,83 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include +#include + +#include "gtest/gtest.h" + +#include "common/cpu_profiler.h" +#include "common/heap_profiler.h" +#include "common/this_thread.h" + +namespace tera { + +class ProfilerTest : public ::testing::Test { +public: + virtual void SetUp() {} + + virtual void TearDown() {} + +private: + CpuProfiler cpu_profiler_; + HeapProfiler heap_profiler_; +}; + +TEST_F(ProfilerTest, SetEnableTest) { + ProfilerState ps; + EXPECT_FALSE(cpu_profiler_.enable_); + EXPECT_FALSE(heap_profiler_.enable_); + ProfilerGetCurrentState(&ps); + EXPECT_FALSE(ps.enabled); + EXPECT_FALSE(IsHeapProfilerRunning()); + + cpu_profiler_.SetProfilerFile("Cpu") + .SetEnable(true); + + heap_profiler_.SetProfilerFile("Heap") + .SetEnable(true); + + EXPECT_TRUE(cpu_profiler_.enable_); + EXPECT_TRUE(heap_profiler_.enable_); + + ThisThread::Sleep(2000); + ProfilerGetCurrentState(&ps); + EXPECT_TRUE(ps.enabled); + EXPECT_TRUE(IsHeapProfilerRunning()); + + cpu_profiler_.SetEnable(false); + heap_profiler_.SetEnable(false); + + EXPECT_FALSE(cpu_profiler_.enable_); + EXPECT_FALSE(heap_profiler_.enable_); + + ThisThread::Sleep(2000); + ProfilerGetCurrentState(&ps); + EXPECT_FALSE(ps.enabled); + EXPECT_FALSE(IsHeapProfilerRunning()); +} + +TEST_F(ProfilerTest, SetInvervalTest) { + EXPECT_EQ(cpu_profiler_.interval_, std::chrono::seconds(10)); + EXPECT_EQ(heap_profiler_.interval_, std::chrono::seconds(10)); + cpu_profiler_.SetInterval(1000); + heap_profiler_.SetInterval(2000); + EXPECT_EQ(cpu_profiler_.interval_, std::chrono::seconds(1000)); + EXPECT_EQ(heap_profiler_.interval_, std::chrono::seconds(2000)); +} + +TEST_F(ProfilerTest, SetProfilerFileTest) { + EXPECT_EQ(cpu_profiler_.profiler_file_, std::string("")); + EXPECT_EQ(heap_profiler_.profiler_file_, std::string("")); + cpu_profiler_.SetProfilerFile("Good"); + heap_profiler_.SetProfilerFile("Bad"); + EXPECT_EQ(cpu_profiler_.profiler_file_, std::string("Good")); + EXPECT_EQ(heap_profiler_.profiler_file_, std::string("Bad")); +} +} // end namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/common/test/thread_pool_test.cc b/src/common/test/thread_pool_test.cc index 6c1e421cb..8462b78a5 100644 --- a/src/common/test/thread_pool_test.cc +++ b/src/common/test/thread_pool_test.cc @@ -56,7 +56,7 @@ TEST(TimerTest, test1) { clock_gettime(CLOCK_REALTIME, &ts1); gettimeofday(&tv, NULL); - int64_t ts = common::timer::get_micros(); + int64_t ts = get_micros(); int delta = 0; delta = ts1.tv_sec - tv.tv_sec; diff --git a/src/common/timer.h b/src/common/timer.h index 1b335bb6b..b035e18c9 100644 --- a/src/common/timer.h +++ b/src/common/timer.h @@ -1,18 +1,31 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +#pragma once +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // Author: yanshiguang02@baidu.com -#ifndef TERA_COMMON_TIMER_H_ -#define TERA_COMMON_TIMER_H_ #include #include #include +#include -namespace common { -namespace timer { +namespace tera{ + +static inline int64_t get_timestamp_from_str(const std::string& time) { + struct tm tm; + memset(&tm, 0, sizeof(tm)); + + sscanf(time.c_str(), "%4d%2d%2d-%d:%d:%d", + &tm.tm_year, &tm.tm_mon, &tm.tm_mday, + &tm.tm_hour, &tm.tm_min, &tm.tm_sec); + + tm.tm_year -= 1900; + tm.tm_mon--; + + return mktime(&tm); +} static inline std::string get_time_str(int64_t timestamp) { struct tm tt; @@ -26,12 +39,24 @@ static inline std::string get_curtime_str() { return get_time_str(time(NULL)); } +static inline std::string get_curtime_str_plain() { + struct tm tt; + char buf[20]; + time_t t = time(NULL); + strftime(buf, 20, "%Y%m%d%H%M%S", localtime_r(&t, &tt)); + return std::string(buf); +} + static inline int64_t get_micros() { struct timespec ts; clock_gettime(CLOCK_REALTIME, &ts); return static_cast(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; } +static inline int64_t get_millis() { + return get_micros() / 1000; +} + static inline int64_t get_unique_micros(int64_t ref) { int64_t now; do { @@ -40,7 +65,12 @@ static inline int64_t get_unique_micros(int64_t ref) { return now; } -} // namespace timer -} // namespace common +static inline int64_t GetTimeStampInUs() { + return get_micros(); +} + +static inline int64_t GetTimeStampInMs() { + return get_millis(); +} +} -#endif // TERA_COMMON_TIMER_H_ diff --git a/src/io/default_compact_strategy.cc b/src/io/default_compact_strategy.cc index b667b8e6e..4e34a6060 100644 --- a/src/io/default_compact_strategy.cc +++ b/src/io/default_compact_strategy.cc @@ -256,6 +256,7 @@ bool DefaultCompactStrategy::InternalMergeProcess(leveldb::Iterator* it, } bool DefaultCompactStrategy::ScanDrop(const Slice& tera_key, uint64_t n) { + bool key_col_qual_same = false; Slice key, col, qual; int64_t ts = -1; leveldb::TeraKeyType type; @@ -345,6 +346,7 @@ bool DefaultCompactStrategy::ScanDrop(const Slice& tera_key, uint64_t n) { } return true; } else { + key_col_qual_same = true; last_type_ = type; } @@ -362,8 +364,7 @@ bool DefaultCompactStrategy::ScanDrop(const Slice& tera_key, uint64_t n) { CHECK(cf_id >= 0) << "illegel column family"; if (type == leveldb::TKT_VALUE) { - if (cur_ts_ == last_ts_ && last_qual_ == qual.ToString() && - last_col_ == col.ToString() && last_key_ == key.ToString()) { + if (cur_ts_ == last_ts_ && key_col_qual_same) { // this is the same key, do not chang version num } else { version_num_++; diff --git a/src/io/tablet_io.cc b/src/io/tablet_io.cc index 81222e447..de97994c7 100644 --- a/src/io/tablet_io.cc +++ b/src/io/tablet_io.cc @@ -27,11 +27,14 @@ #include "leveldb/filter_policy.h" #include "leveldb/raw_key_operator.h" #include "types.h" -#include "utils/counter.h" +#include "common/counter.h" #include "utils/scan_filter.h" #include "utils/string_util.h" -#include "utils/timer.h" +#include "common/timer.h" #include "utils/utils_cmd.h" +#include "common/metric/prometheus_subscriber.h" +#include "common/metric/ratio_subscriber.h" +#include "tabletnode/tabletnode_metric_name.h" DECLARE_string(tera_leveldb_env_type); DECLARE_int64(tera_tablet_log_file_size); @@ -69,11 +72,47 @@ DECLARE_bool(tera_tablet_use_memtable_on_leveldb); DECLARE_int64(tera_tablet_memtable_ldb_write_buffer_size); DECLARE_int64(tera_tablet_memtable_ldb_block_size); -tera::Counter row_read_delay; +DECLARE_bool(tera_leveldb_ignore_corruption_in_open); +DECLARE_int32(tera_leveldb_slow_down_level0_score_limit); +DECLARE_int32(tera_leveldb_max_background_compactions); +DECLARE_int32(tera_tablet_max_sub_parallel_compaction); namespace tera { namespace io { +using tera::tabletnode::kRowDelayMetric; +using tera::tabletnode::kRowCountMetric; +using tera::tabletnode::kRowThroughPutMetric; + +using tera::tabletnode::kApiLabelRead; +using tera::tabletnode::kApiLabelScan; +using tera::tabletnode::kApiLabelWrite; + +using tera::tabletnode::kLowLevelReadMetric; + +tera::MetricCounter low_level_read_count(kLowLevelReadMetric, {SubscriberType::QPS}); + +tera::MetricCounter row_read_delay(kRowDelayMetric, kApiLabelRead, {}); +tera::MetricCounter row_read_count(kRowCountMetric, kApiLabelRead, {SubscriberType::QPS}); +tera::MetricCounter row_read_bytes(kRowThroughPutMetric, kApiLabelRead, {SubscriberType::THROUGHPUT}); + +tera::MetricCounter row_scan_delay(kRowDelayMetric, kApiLabelScan, {}); +tera::MetricCounter row_scan_count(kRowCountMetric, kApiLabelScan, {SubscriberType::QPS}); +tera::MetricCounter row_scan_bytes(kRowThroughPutMetric, kApiLabelScan, {SubscriberType::THROUGHPUT}); + +tera::MetricCounter row_write_bytes(kRowThroughPutMetric, kApiLabelWrite, {SubscriberType::THROUGHPUT}); + +tera::AutoSubscriberRegister row_read_delay_per_row(std::unique_ptr(new tera::RatioSubscriber( + MetricId("tera_ts_row_read_delay_us_per_row"), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowDelayMetric, kApiLabelRead), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowCountMetric, kApiLabelRead), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister row_scan_delay_per_row(std::unique_ptr(new tera::RatioSubscriber( + MetricId("tera_ts_row_scan_delay_us_per_row"), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowDelayMetric, kApiLabelScan), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowCountMetric, kApiLabelScan), SubscriberType::SUM))))); + + std::ostream& operator << (std::ostream& o, const TabletIO& tablet_io) { o << tablet_io.short_path_ << " [" << DebugString(tablet_io.start_key_) @@ -81,6 +120,17 @@ std::ostream& operator << (std::ostream& o, const TabletIO& tablet_io) { return o; } +std::string MetricLabelToString(const std::string& tablet_path) { + size_t sep_pos = tablet_path.find_last_of("/"); + if (sep_pos == std::string::npos) { + // meta tablet + return LabelStringBuilder().Append("table", tablet_path).Append("tablet", tablet_path).ToString(); + } else { + std::string table_name = tablet_path.substr(0, sep_pos); + return LabelStringBuilder().Append("table", table_name).Append("tablet", tablet_path).ToString(); + } +} + TabletIO::TabletIO(const std::string& key_start, const std::string& key_end, const std::string& path) : async_writer_(NULL), @@ -90,10 +140,12 @@ TabletIO::TabletIO(const std::string& key_start, const std::string& key_end, short_path_(path), compact_status_(kTableNotCompact), status_(kNotInit), + tablet_status_(static_cast(kTabletReady)), ref_count_(1), db_ref_count_(0), db_(NULL), m_memory_cache(NULL), kv_only_(false), key_operator_(NULL), + counter_(short_path_), mock_env_(NULL) { } @@ -138,6 +190,10 @@ std::string TabletIO::GetEndKey() const { return end_key_; } +const std::string& TabletIO::GetMetricLabel() const { + return counter_.label; +} + CompactStatus TabletIO::GetCompactStatus() const { return compact_status_; } @@ -167,6 +223,7 @@ void TabletIO::SetMemoryCache(leveldb::Cache* cache) { bool TabletIO::Load(const TableSchema& schema, const std::string& path, const std::vector& parent_tablets, + const std::set& ignore_err_lgs, std::map snapshots, std::map rollbacks, leveldb::Logger* logger, @@ -226,6 +283,7 @@ bool TabletIO::Load(const TableSchema& schema, ldb_options_.key_start = raw_start_key_; ldb_options_.key_end = raw_end_key_; ldb_options_.l0_slowdown_writes_trigger = FLAGS_tera_tablet_level0_file_limit; + ldb_options_.max_sub_parallel_compaction = FLAGS_tera_tablet_max_sub_parallel_compaction; ldb_options_.ttl_percentage = FLAGS_tera_tablet_ttl_percentage; ldb_options_.del_percentage = FLAGS_tera_tablet_del_percentage; ldb_options_.block_size = FLAGS_tera_tablet_write_block_size * 1024; @@ -234,6 +292,9 @@ bool TabletIO::Load(const TableSchema& schema, ldb_options_.log_async_mode = FLAGS_tera_log_async_mode; ldb_options_.info_log = logger; ldb_options_.max_open_files = FLAGS_tera_memenv_table_cache_size; + ldb_options_.max_background_compactions = FLAGS_tera_leveldb_max_background_compactions; + ldb_options_.slow_down_level0_score_limit = FLAGS_tera_leveldb_slow_down_level0_score_limit; + ldb_options_.ignore_corruption_in_open = FLAGS_tera_leveldb_ignore_corruption_in_open; ldb_options_.use_memtable_on_leveldb = FLAGS_tera_tablet_use_memtable_on_leveldb; ldb_options_.memtable_ldb_write_buffer_size = @@ -277,7 +338,7 @@ bool TabletIO::Load(const TableSchema& schema, ldb_options_.ignore_corruption_in_compaction = FLAGS_tera_leveldb_ignore_corruption_in_compaction; ldb_options_.use_file_lock = FLAGS_tera_leveldb_use_file_lock; ldb_options_.disable_wal = table_schema_.disable_wal(); - SetupOptionsForLG(); + SetupOptionsForLG(ignore_err_lgs); std::string path_prefix = FLAGS_tera_tabletnode_path_prefix; if (*path_prefix.rbegin() != '/') { @@ -328,6 +389,23 @@ bool TabletIO::Load(const TableSchema& schema, return true; } +bool TabletIO::ShouldForceUnloadOnError() { + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return false; + } + db_ref_count_++; + } + // If TabletIO is Ready but has encountered some fatal errors + bool ret = db_->ShouldForceUnloadOnError(); + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return ret; +} + bool TabletIO::Unload(StatusCode* status) { { MutexLock lock(&mutex_); @@ -341,7 +419,6 @@ bool TabletIO::Unload(StatusCode* status) { LOG(INFO) << "[Unload] start shutdown1 " << tablet_path_; leveldb::Status s = db_->Shutdown1(); - { MutexLock lock(&mutex_); status_ = kUnLoading2; @@ -566,13 +643,13 @@ bool TabletIO::IsBusy() { db_ref_count_++; } bool is_busy = db_->BusyWrite(); + is_busy = is_busy ? true : async_writer_->IsBusy(); { MutexLock lock(&mutex_); db_ref_count_--; } return is_busy; } - bool TabletIO::Workload(double* write_workload) { { MutexLock lock(&mutex_); @@ -581,7 +658,14 @@ bool TabletIO::Workload(double* write_workload) { } db_ref_count_++; } + + // if busy cause by write log, set workload score more than 10, because level 0 + // limits to 20 sst files by default, which score is 10. db_->Workload(write_workload); + if (*write_workload < 10.618 && async_writer_->IsBusy()) { + *write_workload = 10.618; + } + { MutexLock lock(&mutex_); db_ref_count_--; @@ -700,6 +784,7 @@ bool TabletIO::LowLevelScan(const std::string& start_tera_key, ScanContext* context = new ScanContext; context->compact_strategy = ldb_options_.compact_strategy_factory->NewInstance(); context->version_num = 1; + context->qu_num = 1; bool ret = LowLevelScan(start_tera_key, end_row_key, scan_options, it, context, value_list, next_start_point, read_row_count, read_bytes, is_complete, status); @@ -849,6 +934,7 @@ inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, std::string& last_col = scan_context->last_col; std::string& last_qual = scan_context->last_qual; uint32_t& version_num = scan_context->version_num; + uint64_t& qu_num = scan_context->qu_num; std::list row_buf; uint32_t buffer_size = 0; @@ -861,13 +947,18 @@ inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, KeyValuePair next_start_kv_pair; VLOG(9) << "ll-scan timeout set to be " << scan_options.timeout << ", start_tera_key " << DebugString(start_tera_key) - << ", end_row_key " << DebugString(end_row_key); + << ", end_row_key " << DebugString(end_row_key) + << ", max_size " << scan_options.max_size + << ", number_limit " << scan_options.number_limit + << ", max_versions " << scan_options.max_versions + << ", max_qualifiers " << scan_options.max_qualifiers; *is_complete = false; for (; it->Valid();) { bool has_merged = false; std::string merged_value; counter_.low_read_cell.Inc(); + low_level_read_count.Inc(); *read_bytes += it->key().size() + it->value().size(); now_time = GetTimeStampInMs(); @@ -886,7 +977,21 @@ inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, << "] key=[" << DebugString(key.ToString()) << "] column=[" << DebugString(col.ToString()) << ":" << DebugString(qual.ToString()) - << "] ts=[" << ts << "] type=[" << type << "]"; + << "] ts=[" << ts << "] type=[" << type << "]" + << " buffer_size=[" << buffer_size << "]" + << " number_limit=[" << number_limit << "]" + << " read_bytes=[" << *read_bytes << "]" + << " qu_num=[" << qu_num << "]"; + + if (now_time > time_out) { + VLOG(9) << "ll-scan timeout, now_time: " << now_time << ", time_out: " << time_out; + if (next_start_point != NULL) { + VLOG(9) << "Mark next start key: " << DebugString(tera_key.ToString()); + MakeKvPair(key, col, qual, ts, "", next_start_point); + } + SetStatusCode(kRPCTimeout, status); + break; + } if (end_row_key.size() && key.compare(end_row_key) >= 0) { // scan finished @@ -932,15 +1037,8 @@ inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, *read_row_count += 1; ProcessRowBuffer(row_buf, scan_options, value_list, &buffer_size, &number_limit); row_buf.clear(); - - if (now_time > time_out && (next_start_point != NULL)) { - VLOG(9) << "ll-scan timeout. Mark next start key: " << DebugString(tera_key.ToString()); - MakeKvPair(key, col, qual, ts, "", next_start_point); - break; - } } - // max version filter if (key.compare(last_key) == 0 && col.compare(last_col) == 0 && qual.compare(last_qual) == 0) { @@ -949,6 +1047,16 @@ inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, continue; } } else { + if (key.compare(last_key) == 0 && col.compare(last_col) == 0 ) { + if (++qu_num > scan_options.max_qualifiers) { + VLOG(10) << "max_qualifiers triggered, max_qualifiers: " << scan_options.max_qualifiers; + it->Next(); + continue; + } + } else { + qu_num = 1; + } + last_key.assign(key.data(), key.size()); last_col.assign(col.data(), col.size()); last_qual.assign(qual.data(), qual.size()); @@ -957,6 +1065,7 @@ inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, has_merged = compact_strategy->ScanMergedValue(it, &merged_value, &merged_num); if (has_merged) { counter_.low_read_cell.Add(merged_num - 1); + low_level_read_count.Add(merged_num - 1); value = merged_value; key = last_key; col = last_col; @@ -977,7 +1086,9 @@ inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, // check scan buffer if (buffer_size >= scan_options.max_size || number_limit >= scan_options.number_limit) { - VLOG(10) << "stream scan, break scan context, version_num " << version_num + VLOG(10) << "stream scan, break scan context" + <<", buffer_size " << buffer_size + <<", number_limit " << number_limit << ", key " << DebugString(key.ToString()) << ", col " << DebugString(col.ToString()) << ", qual " << DebugString(qual.ToString()); it->Next(); @@ -1000,6 +1111,9 @@ inline bool TabletIO::LowLevelScan(const std::string& start_tera_key, ProcessRowBuffer(row_buf, scan_options, value_list, &buffer_size, &number_limit); } + if (*status == kRPCTimeout) { + return false; + } if (!it->Valid() && !(it->status().ok())) { SetStatusCode(it->status(), status); VLOG(10) << "ll-scan fail: " << "tablet=[" << tablet_path_ << "], " @@ -1054,6 +1168,7 @@ bool TabletIO::LowLevelSeek(const std::string& row_key, leveldb::TKT_FORSEEK, &row_seek_key); it_data->Seek(row_seek_key); counter_.low_read_cell.Inc(); + low_level_read_count.Inc(); if (it_data->Valid()) { VLOG(10) << "ll-seek: " << "tablet=[" << tablet_path_ << "] row_key=[" << row_key << "]"; @@ -1087,6 +1202,7 @@ bool TabletIO::LowLevelSeek(const std::string& row_key, leveldb::TKT_FORSEEK, &cf_seek_key); it_data->Seek(cf_seek_key); counter_.low_read_cell.Inc(); + low_level_read_count.Inc(); if (it_data->Valid()) { VLOG(10) << "ll-seek: " << "tablet=[" << tablet_path_ << "] row_key=[" << row_key @@ -1122,6 +1238,7 @@ bool TabletIO::LowLevelSeek(const std::string& row_key, uint32_t version_num = 0; for (; it_data->Valid();) { counter_.low_read_cell.Inc(); + low_level_read_count.Inc(); VLOG(10) << "ll-seek: " << "tablet=[" << tablet_path_ << "] row_key=[" << row_key << "] cf=[" << cf_name << "] qu=[" << qu_name << "]"; @@ -1134,7 +1251,7 @@ bool TabletIO::LowLevelSeek(const std::string& row_key, break; } - // skip qu delete mark + // skip qu delete mark and out-of-range version if (compact_strategy->ScanDrop(it_data->key(), 0)) { VLOG(10) << "ll-seek: scan drop " << "tablet=[" << tablet_path_ << "] row_key=[" << row_key << "] cf=[" << cf_name @@ -1143,6 +1260,14 @@ bool TabletIO::LowLevelSeek(const std::string& row_key, continue; } + if (scan_options.ts_start > timestamp) { + break; + } + if (scan_options.ts_end < timestamp) { + it_data->Next(); + continue; + } + // version filter if (++version_num > scan_options.max_versions) { break; @@ -1160,6 +1285,7 @@ bool TabletIO::LowLevelSeek(const std::string& row_key, compact_strategy->ScanMergedValue(it_data, &merged_value, &merged_num); if (has_merged) { counter_.low_read_cell.Add(merged_num - 1); + low_level_read_count.Add(merged_num - 1); kv->set_value(merged_value); VLOG(10) << "ll-seek merge: " << "key=[" << DebugString(row_key) << "] column=[" << DebugString(cf_name) @@ -1188,7 +1314,7 @@ bool TabletIO::LowLevelSeek(const std::string& row_key, } bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, - uint64_t snapshot_id, StatusCode* status) { + uint64_t snapshot_id, StatusCode* status, int64_t timeout_ms) { { MutexLock lock(&mutex_); if (status_ != kReady && status_ != kOnSplit @@ -1205,7 +1331,7 @@ bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, db_ref_count_++; } - int64_t read_ms = get_micros(); + int64_t start_read_us = get_micros(); if (kv_only_) { std::string key(row_reader.key()); @@ -1215,7 +1341,8 @@ bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, } if (!Read(key, &value, snapshot_id, status)) { counter_.read_rows.Inc(); - row_read_delay.Add(get_micros() - read_ms); + row_read_count.Inc(); + row_read_delay.Add(get_micros() - start_read_us); { MutexLock lock(&mutex_); db_ref_count_--; @@ -1226,8 +1353,10 @@ bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, result->set_key(row_reader.key()); result->set_value(value); counter_.read_rows.Inc(); + row_read_count.Inc(); counter_.read_size.Add(result->ByteSize()); - row_read_delay.Add(get_micros() - read_ms); + row_read_bytes.Add(result->ByteSize()); + row_read_delay.Add(get_micros() - start_read_us); { MutexLock lock(&mutex_); db_ref_count_--; @@ -1258,12 +1387,23 @@ bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, if (row_reader.has_max_version()) { scan_options.max_versions = row_reader.max_version(); } + + if (row_reader.has_max_qualifiers()) { + scan_options.max_qualifiers = row_reader.max_qualifiers(); + } else { + scan_options.max_qualifiers = std::numeric_limits::max(); + } + if (row_reader.has_time_range()) { scan_options.ts_start = row_reader.time_range().ts_start(); scan_options.ts_end = row_reader.time_range().ts_end(); + VLOG(10) << "ReadCells: " << "timerange=[" << scan_options.ts_start + << "," << scan_options.ts_end << "]"; } scan_options.snapshot_id = snapshot_id; + scan_options.timeout = timeout_ms; + VLOG(10) << "ReadCells: " << "key=[" << DebugString(row_reader.key()) << "]"; @@ -1284,7 +1424,8 @@ bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, &is_complete, status); } counter_.read_rows.Inc(); - row_read_delay.Add(get_micros() - read_ms); + row_read_count.Inc(); + row_read_delay.Add(get_micros() - start_read_us); { MutexLock lock(&mutex_); db_ref_count_--; @@ -1293,6 +1434,7 @@ bool TabletIO::ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, return false; } else { counter_.read_size.Add(value_list->ByteSize()); + row_read_bytes.Add(value_list->ByteSize()); } if (value_list->key_values_size() == 0) { @@ -1310,7 +1452,6 @@ bool TabletIO::WriteBatch(leveldb::WriteBatch* batch, bool disable_wal, bool syn CHECK_NOTNULL(db_); - counter_.write_size.Add(batch->DataSize()); leveldb::Status db_status = db_->Write(options, batch); if (!db_status.ok()) { LOG(ERROR) << "fail to batch write to tablet: " << tablet_path_ @@ -1318,6 +1459,8 @@ bool TabletIO::WriteBatch(leveldb::WriteBatch* batch, bool disable_wal, bool syn SetStatusCode(kIOError, status); return false; } + counter_.write_size.Add(batch->DataSize()); + row_write_bytes.Add(batch->DataSize()); SetStatusCode(kTabletNodeOk, status); return true; } @@ -1349,6 +1492,10 @@ bool TabletIO::Write(std::vector* row_mutation_vec, } bool ret = async_writer_->Write(row_mutation_vec, status_vec, is_instant, callback, status); + if (!ret) { + counter_.write_reject_rows.Add(row_mutation_vec->size()); + } + { MutexLock lock(&mutex_); db_ref_count_--; @@ -1426,12 +1573,18 @@ bool TabletIO::ScanRowsRestricted(const ScanTabletRequest* request, StatusCode status = kTabletNodeOk; bool ret = false; + + int64_t start_scan_us = get_micros(); + if (LowLevelScan(start_tera_key, end_row_key, scan_options, response->mutable_results(), response->mutable_next_start_point(), &read_row_count, &read_bytes, &is_complete, &status)) { response->set_complete(is_complete); counter_.scan_rows.Add(read_row_count); counter_.scan_size.Add(read_bytes); + row_scan_count.Add(read_row_count); + row_scan_bytes.Add(read_bytes); + row_scan_delay.Add(get_micros() - start_scan_us); ret = true; } @@ -1464,17 +1617,26 @@ bool TabletIO::HandleScan(const ScanTabletRequest* request, void TabletIO::ProcessScan(ScanContext* context) { uint32_t rows_scan_num = 0; uint32_t size_scan_bytes = 0; + + int64_t start_scan_us = get_micros(); + if (LowLevelScan(context->start_tera_key, context->end_row_key, context->scan_options, context->it, context, context->result, NULL, &rows_scan_num, &size_scan_bytes, &context->complete, &context->ret_code)) { counter_.scan_rows.Add(rows_scan_num); counter_.scan_size.Add(size_scan_bytes); + row_scan_count.Add(rows_scan_num); + row_scan_bytes.Add(size_scan_bytes); + row_scan_delay.Add(get_micros() - start_scan_us); } } bool TabletIO::Scan(const ScanOption& option, KeyValueList* kv_list, bool* complete, StatusCode* status) { + + int64_t start_scan_us = get_micros(); + std::string start = option.key_range().key_start(); std::string end = option.key_range().key_end(); if (start < start_key_) { @@ -1558,8 +1720,13 @@ bool TabletIO::Scan(const ScanOption& option, KeyValueList* kv_list, if (!it->Valid()) { *complete = true; } + counter_.scan_rows.Add(kv_list->size()); counter_.scan_size.Add(pack_size); + row_scan_count.Add(kv_list->size()); + row_scan_bytes.Add(pack_size); + row_scan_delay.Add(get_micros() - start_scan_us); + delete it; delete strategy; @@ -1618,6 +1785,11 @@ void TabletIO::SetupScanRowOptions(const ScanTabletRequest* request, if (request->has_max_version()) { scan_options->max_versions = request->max_version(); } + if (request->has_max_qualifiers()) { + scan_options->max_qualifiers = request->max_qualifiers(); + } else { + scan_options->max_qualifiers = std::numeric_limits::max(); + } if (request->has_timerange()) { scan_options->ts_start = request->timerange().ts_start(); scan_options->ts_end = request->timerange().ts_end(); @@ -1635,7 +1807,7 @@ void TabletIO::SetupScanRowOptions(const ScanTabletRequest* request, } // no concurrent, so no lock on schema_mutex_ -void TabletIO::SetupOptionsForLG() { +void TabletIO::SetupOptionsForLG(const std::set& ignore_err_lgs) { if (kv_only_) { if (RawKeyType() == TTLKv) { ldb_options_.compact_strategy_factory = @@ -1656,6 +1828,7 @@ void TabletIO::SetupOptionsForLG() { std::set* exist_lg_list = new std::set; std::map* lg_info_list = new std::map; + std::set ignore_corruption_in_open_lg_list; int64_t triggered_log_size = 0; for (int32_t lg_i = 0; lg_i < table_schema_.locality_groups_size(); @@ -1721,6 +1894,9 @@ void TabletIO::SetupOptionsForLG() { triggered_log_size += lg_info->write_buffer_size; exist_lg_list->insert(lg_i); (*lg_info_list)[lg_i] = lg_info; + if (ignore_err_lgs.find(lg_schema.name()) != ignore_err_lgs.end()) { + ignore_corruption_in_open_lg_list.insert(lg_i); + } } if (mock_env_ != NULL) { ldb_options_.env = LeveldbMockEnv(); @@ -1738,6 +1914,8 @@ void TabletIO::SetupOptionsForLG() { delete lg_info_list; } else { ldb_options_.lg_info_list = lg_info_list; + ldb_options_.ignore_corruption_in_open_lg_list + = ignore_corruption_in_open_lg_list; } IndexingCfToLG(); @@ -1994,23 +2172,6 @@ const leveldb::RawKeyOperator* TabletIO::GetRawKeyOperator() { return key_operator_; } -void TabletIO::GetAndClearCounter(TabletCounter* counter) { - counter->set_low_read_cell(counter_.low_read_cell.Clear()); - counter->set_scan_rows(counter_.scan_rows.Clear()); - counter->set_scan_kvs(counter_.scan_kvs.Clear()); - counter->set_scan_size(counter_.scan_size.Clear()); - counter->set_read_rows(counter_.read_rows.Clear()); - counter->set_read_kvs(counter_.read_kvs.Clear()); - counter->set_read_size(counter_.read_size.Clear()); - counter->set_write_rows(counter_.write_rows.Clear()); - counter->set_write_kvs(counter_.write_kvs.Clear()); - counter->set_write_size(counter_.write_size.Clear()); - counter->set_is_on_busy(IsBusy()); - double write_workload = 0; - Workload(&write_workload); - counter->set_write_workload(write_workload); -} - int32_t TabletIO::AddRef() { MutexLock lock(&mutex_); ++ref_count_; @@ -2040,6 +2201,36 @@ void TabletIO::ApplySchema(const TableSchema& schema) { ldb_options_.compact_strategy_factory->SetArg(&schema); } +bool TabletIO::PutIfAbsentCheck(const std::string& row_key, + const Mutation& mutation) { + RowResult value_list; + ScanOptions scan_options; + std::set& qualifier_list = scan_options.column_family_list[mutation.family()]; + qualifier_list.insert(mutation.qualifier()); + scan_options.iter_cf_set.insert(mutation.family()); + scan_options.max_versions = 1; + StatusCode status; + if (!LowLevelSeek(row_key, scan_options, &value_list, &status)) { + if (status == kKeyNotExist) { + return true; + } + VLOG(9) << "txn of row (PutIfAbsent) " << DebugString(row_key) + << ":" << DebugString(mutation.family()) + << ":" << DebugString(mutation.qualifier()) + << " is interrupted: lowlevelseek fail"; + return false; + } + + if (value_list.key_values_size() > 0) { + VLOG(9) << "txn of row (PutIfAbsent) " << DebugString(row_key) + << ":" << DebugString(mutation.family()) + << ":" << DebugString(mutation.qualifier()) + << " is interrupted: already exist"; + return false; + } + return true; +} + bool TabletIO::SingleRowTxnCheck(const std::string& row_key, const SingleRowTxnReadInfo& txn_read_info, StatusCode* status) { @@ -2098,5 +2289,33 @@ bool TabletIO::SingleRowTxnCheck(const std::string& row_key, return true; } +bool TabletIO::GetDBStatus(tera::TabletStatus* tablet_status, bool slow_check) { + *tablet_status = static_cast(kTabletReady); + { + MutexLock lock(&mutex_); + if (status_ != kReady) { + return false; + } + db_ref_count_++; + } + + std::string db_property_key = "leveldb.verify-db-integrity"; + std::string db_property_val; + if (slow_check && db_->GetProperty(db_property_key, &db_property_val)) { + if (db_property_val.find("verify_fail") != std::string::npos) { + tablet_status_ = kTabletCorruption; + } else { + tablet_status_ = static_cast(kTabletReady); + } + } + *tablet_status = tablet_status_; + + { + MutexLock lock(&mutex_); + db_ref_count_--; + } + return true; +} + } // namespace io } // namespace tera diff --git a/src/io/tablet_io.h b/src/io/tablet_io.h index ba5cd99cf..9ce73d96a 100644 --- a/src/io/tablet_io.h +++ b/src/io/tablet_io.h @@ -13,6 +13,7 @@ #include #include "common/base/scoped_ptr.h" +#include "common/metric/metric_counter.h" #include "common/mutex.h" #include "io/tablet_scanner.h" #include "leveldb/db.h" @@ -26,9 +27,23 @@ #include "proto/table_schema.pb.h" #include "proto/tabletnode_rpc.pb.h" #include "types.h" -#include "utils/counter.h" +#include "common/counter.h" namespace tera { + +// metric name constants +const char* const kLowReadCellMetricName = "tera_ts_tablet_low_read_cell_count"; +const char* const kScanRowsMetricName = "tera_ts_tablet_scan_row_count"; +const char* const kScanKvsMetricName = "tera_ts_tablet_scan_kv_count"; +const char* const kScanThroughPutMetricName = "tera_ts_tablet_scan_through_put"; +const char* const kReadRowsMetricName = "tera_ts_tablet_read_row_count"; +const char* const kReadKvsMetricName = "tera_ts_tablet_read_kv_count"; +const char* const kReadThroughPutMetricName = "tera_ts_tablet_read_through_put"; +const char* const kWriteRowsMetricName = "tera_ts_tablet_write_row_count"; +const char* const kWriteKvsMetricName = "tera_ts_tablet_write_kv_count"; +const char* const kWriteThroughPutMetricName = "tera_ts_tablet_write_through_put"; +const char* const kWriteRejectRowsMetricName = "tera_ts_tablet_write_reject_row_count"; + namespace io { class TabletWriter; @@ -36,6 +51,8 @@ struct ScanOptions; struct ScanContext; class ScanContextManager; +std::string MetricLabelToString(const std::string& tablet_path); + class TabletIO { public: enum CompactionType { @@ -54,16 +71,32 @@ class TabletIO { }; struct StatCounter { - tera::Counter low_read_cell; - tera::Counter scan_rows; - tera::Counter scan_kvs; - tera::Counter scan_size; - tera::Counter read_rows; - tera::Counter read_kvs; - tera::Counter read_size; - tera::Counter write_rows; - tera::Counter write_kvs; - tera::Counter write_size; + const std::string label; + tera::MetricCounter low_read_cell; + tera::MetricCounter scan_rows; + tera::MetricCounter scan_kvs; + tera::MetricCounter scan_size; + tera::MetricCounter read_rows; + tera::MetricCounter read_kvs; + tera::MetricCounter read_size; + tera::MetricCounter write_rows; + tera::MetricCounter write_kvs; + tera::MetricCounter write_size; + tera::MetricCounter write_reject_rows; + + StatCounter(const std::string& tablet_path) + : label(MetricLabelToString(tablet_path)), + low_read_cell(tera::kLowReadCellMetricName, label, {SubscriberType::QPS}), + scan_rows(tera::kScanRowsMetricName, label, {SubscriberType::QPS}), + scan_kvs(tera::kScanKvsMetricName, label, {SubscriberType::QPS}), + scan_size(tera::kScanThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), + read_rows(tera::kReadRowsMetricName, label, {SubscriberType::QPS}), + read_kvs(tera::kReadKvsMetricName, label, {SubscriberType::QPS}), + read_size(tera::kReadThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), + write_rows(tera::kWriteRowsMetricName, label, {SubscriberType::QPS}), + write_kvs(tera::kWriteKvsMetricName, label, {SubscriberType::QPS}), + write_size(tera::kWriteThroughPutMetricName, label, {SubscriberType::THROUGHPUT}), + write_reject_rows(tera::kWriteRejectRowsMetricName, label, {SubscriberType::QPS}) {} }; typedef std::function*, @@ -83,6 +116,7 @@ class TabletIO { std::string GetTablePath() const; std::string GetStartKey() const; std::string GetEndKey() const; + const std::string& GetMetricLabel() const; virtual CompactStatus GetCompactStatus() const; virtual TableSchema GetSchema() const; RawKey RawKeyType() const; @@ -94,6 +128,7 @@ class TabletIO { virtual bool Load(const TableSchema& schema, const std::string& path, const std::vector& parent_tablets, + const std::set& ignore_err_lgs, std::map snapshots, std::map rollbacks, leveldb::Logger* logger = NULL, @@ -118,7 +153,8 @@ class TabletIO { // read a row virtual bool ReadCells(const RowReaderInfo& row_reader, RowResult* value_list, - uint64_t snapshot_id = 0, StatusCode* status = NULL); + uint64_t snapshot_id = 0, StatusCode* status = NULL, + int64_t timeout_ms = std::numeric_limits::max()); /// scan from leveldb return ture means complete flase means not complete bool LowLevelScan(const std::string& start_tera_key, const std::string& end_row_key, @@ -162,8 +198,6 @@ class TabletIO { void SetStatus(TabletStatus status); TabletStatus GetStatus(); - void GetAndClearCounter(TabletCounter* counter); - int32_t AddRef(); int32_t DecRef(); int32_t GetRef() const; @@ -173,6 +207,10 @@ class TabletIO { void ProcessScan(ScanContext* context); void ApplySchema(const TableSchema& schema); + bool ShouldForceUnloadOnError(); + + bool GetDBStatus(tera::TabletStatus* tablet_status, bool slow_check); + private: friend class TabletWriter; friend class ScanConextManager; @@ -180,7 +218,7 @@ class TabletIO { bool sync = false, StatusCode* status = NULL); // int64_t GetDataSizeWithoutLock(StatusCode* status = NULL); - void SetupOptionsForLG(); + void SetupOptionsForLG(const std::set& ignore_err_lgs); void TearDownOptionsForLG(); void IndexingCfToLG(); @@ -245,6 +283,8 @@ class TabletIO { KeyValuePair* next); void SetSchema(const TableSchema& schema); + bool PutIfAbsentCheck(const std::string& row_key, const Mutation& mutation); + bool SingleRowTxnCheck(const std::string& row_key, const SingleRowTxnReadInfo& txn_read_info, StatusCode* status); @@ -263,6 +303,7 @@ class TabletIO { CompactStatus compact_status_; TabletStatus status_; + tera::TabletStatus tablet_status_; // check wether db corruption volatile int32_t ref_count_; volatile int32_t db_ref_count_; leveldb::Options ldb_options_; diff --git a/src/io/tablet_scanner.cc b/src/io/tablet_scanner.cc index d799f3fe9..47f082126 100644 --- a/src/io/tablet_scanner.cc +++ b/src/io/tablet_scanner.cc @@ -134,6 +134,9 @@ bool ScanContextManager::ScheduleScanContext(ScanContext* context) { // complete or io error, return all the rest request to client if (context->complete || (context->ret_code != kTabletNodeOk)) { DeleteScanContext(context); // never use context + if (context->ret_code != kTabletNodeOk) { + return false; + } return true; } if (context->jobs.size() == 0) { @@ -148,6 +151,7 @@ bool ScanContextManager::ScheduleScanContext(ScanContext* context) { MutexLock l(&lock_); if (context->ret_code != kTabletNodeOk) { DeleteScanContext(context); // never use context + return false; } } return true; diff --git a/src/io/tablet_scanner.h b/src/io/tablet_scanner.h index e816e1b11..d468bdb6f 100644 --- a/src/io/tablet_scanner.h +++ b/src/io/tablet_scanner.h @@ -33,12 +33,15 @@ struct ScanOptions { ColumnFamilyMap column_family_list; std::set iter_cf_set; int64_t timeout; + uint64_t max_qualifiers; ScanOptions() : max_versions(std::numeric_limits::max()), max_size(std::numeric_limits::max()), number_limit(std::numeric_limits::max()), - ts_start(kOldestTs), ts_end(kLatestTs), snapshot_id(0), timeout(std::numeric_limits::max() / 2) + ts_start(kOldestTs), ts_end(kLatestTs), snapshot_id(0), + timeout(std::numeric_limits::max() / 2), + max_qualifiers(std::numeric_limits::max()) {} }; @@ -55,6 +58,7 @@ struct ScanContext { leveldb::Iterator* it; // init to NULL leveldb::CompactStrategy* compact_strategy; uint32_t version_num; + uint64_t qu_num; std::string last_key; std::string last_col; std::string last_qual; diff --git a/src/io/tablet_writer.cc b/src/io/tablet_writer.cc index 5e8791cda..81954d5bd 100644 --- a/src/io/tablet_writer.cc +++ b/src/io/tablet_writer.cc @@ -5,6 +5,8 @@ #include "io/tablet_writer.h" #include +#include +#include #include #include @@ -16,9 +18,13 @@ #include "leveldb/lg_coding.h" #include "proto/proto_helper.h" #include "tera/table_descriptor.h" -#include "utils/counter.h" +#include "common/counter.h" #include "utils/string_util.h" -#include "utils/timer.h" +#include "common/timer.h" + +#include "tabletnode/tabletnode_metric_name.h" +#include "common/metric/ratio_subscriber.h" +#include "common/metric/prometheus_subscriber.h" DECLARE_int32(tera_asyncwriter_pending_limit); DECLARE_bool(tera_enable_level0_limit); @@ -30,6 +36,20 @@ DECLARE_bool(tera_sync_log); namespace tera { namespace io { +using tera::tabletnode::kRowDelayMetric; +using tera::tabletnode::kRowCountMetric; + +using tera::tabletnode::kApiLabelWrite; +using tera::Subscriber; + +tera::MetricCounter row_write_count(kRowCountMetric, kApiLabelWrite, {SubscriberType::QPS}); +tera::MetricCounter row_write_delay(kRowDelayMetric, kApiLabelWrite, {}); + +tera::AutoSubscriberRegister row_write_delay_per_row(std::unique_ptr(new tera::RatioSubscriber( + MetricId("tera_ts_row_write_delay_us_per_row"), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowDelayMetric, kApiLabelWrite), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRowCountMetric, kApiLabelWrite), SubscriberType::SUM))))); + TabletWriter::TabletWriter(TabletIO* tablet_io) : tablet_(tablet_io), stopped_(true), sync_timestamp_(0), @@ -157,15 +177,21 @@ void TabletWriter::DoWork() { } // 否则 flush VLOG(7) << "write data, sleep_duration: " << sleep_duration; - + sync_timestamp_ = GetTimeStampInMs(); FlushToDiskBatch(sealed_buffer_); sealed_buffer_->clear(); - sync_timestamp_ = GetTimeStampInMs(); } LOG(INFO) << "AsyncWriter::DoWork done"; worker_done_event_.Set(); } +bool TabletWriter::IsBusy() { + const uint64_t MAX_PENDING_SIZE = FLAGS_tera_asyncwriter_pending_limit * 1024UL; + + MutexLock lock(&task_mutex_); + return active_buffer_size_ >= MAX_PENDING_SIZE; +} + bool TabletWriter::SwapActiveBuffer(bool force) { const uint64_t SYNC_SIZE = FLAGS_tera_asyncwriter_sync_size_threshold * 1024UL; if (FLAGS_tera_enable_level0_limit == true) { @@ -200,12 +226,14 @@ void TabletWriter::BatchRequest(WriteTaskBuffer* task_buffer, WriteTask& task = (*task_buffer)[task_idx]; const std::vector& row_mutation_vec = *(task.row_mutation_vec); std::vector* status_vec = task.status_vec; + const std::vector& ignore_row_vec = task.ignore_row_vec; for (uint32_t i = 0; i < row_mutation_vec.size(); ++i) { StatusCode* status = &((*status_vec)[i]); + const IgnoreCellFlags& ignore_cell_flags = ignore_row_vec[i]; const RowMutationSequence& row_mu = *row_mutation_vec[i]; const std::string& row_key = row_mu.row_key(); - int32_t mu_num = row_mu.mutation_sequence().size(); + uint32_t mu_num = row_mu.mutation_sequence().size(); if (*status != kTabletNodeOk) { VLOG(11) << "batch write fail, row " << DebugString(row_key) << ", status " << StatusCodeToString(*status); @@ -235,7 +263,12 @@ void TabletWriter::BatchRequest(WriteTaskBuffer* task_buffer, batch->Delete(tera_key); } } else { - for (int32_t t = 0; t < mu_num; ++t) { + for (uint32_t t = 0; t < mu_num; ++t) { + if (t < ignore_cell_flags.size() && ignore_cell_flags[t]) { + VLOG(11) << "batch write ignore cell @ " << DebugString(row_key) + << "[" << task_idx << "," << i << "," << t << "]"; + continue; + } const Mutation& mu = row_mu.mutation_sequence().Get(t); std::string tera_key; leveldb::TeraKeyType type = leveldb::TKT_VALUE; @@ -258,9 +291,11 @@ void TabletWriter::BatchRequest(WriteTaskBuffer* task_buffer, case kAddInt64: type = leveldb::TKT_ADDINT64; break; + /* case kPutIfAbsent: type = leveldb::TKT_PUT_IFABSENT; break; + */ case kAppend: type = leveldb::TKT_APPEND; break; @@ -317,6 +352,8 @@ void TabletWriter::FinishTask(WriteTaskBuffer* task_buffer, StatusCode status) { for (uint32_t task_idx = 0; task_idx < task_buffer->size(); ++task_idx) { WriteTask& task = (*task_buffer)[task_idx]; tablet_->GetCounter().write_rows.Add(task.row_mutation_vec->size()); + row_write_count.Add(task.row_mutation_vec->size()); + row_write_delay.Add(get_micros() - task.start_time); for (uint32_t i = 0; i < task.row_mutation_vec->size(); i++) { tablet_->GetCounter().write_kvs.Add((*task.row_mutation_vec)[i]->mutation_sequence_size()); // set batch_write status for row_mu @@ -329,7 +366,7 @@ void TabletWriter::FinishTask(WriteTaskBuffer* task_buffer, StatusCode status) { return; } -// set status to kTxnFail, if transaction conflicts. +// set status to kTxnFail, if single row transaction or putifabsent conflicts bool TabletWriter::CheckSingleRowTxnConflict(const RowMutationSequence& row_mu, std::set* commit_row_key_set, StatusCode* status) { @@ -359,6 +396,36 @@ bool TabletWriter::CheckSingleRowTxnConflict(const RowMutationSequence& row_mu, return false; } +void TabletWriter::MarkPutIfAbsentConflict(const RowMutationSequence& row_mu, + IgnoreCellFlags* ignore_cell_flags, + std::unordered_set* not_exist_cell_set) { + const std::string& row_key = row_mu.row_key(); + // check every mutate item if mutation type is PutIfAbsent + for (int32_t i = 0; i < row_mu.mutation_sequence_size(); ++i) { + const Mutation& mutation = row_mu.mutation_sequence(i); + if (mutation.type() != kPutIfAbsent) { + continue; + } + std::string cell_key; + tablet_->GetRawKeyOperator()->EncodeTeraKey(row_key, + mutation.family(), mutation.qualifier(), kLatestTs, + leveldb::TKT_FORSEEK, &cell_key); + if (not_exist_cell_set->find(cell_key) != not_exist_cell_set->end()) { + VLOG(9) << "txn of row (PutIfAbsent) " << DebugString(row_key) + << ":" << DebugString(mutation.family()) + << ":" << DebugString(mutation.qualifier()); + (*ignore_cell_flags)[i] = true; + } + if (!tablet_->PutIfAbsentCheck(row_key, mutation)) { + VLOG(9) << "txn of row (PutIfAbsent) " << DebugString(row_key) + << ":" << DebugString(mutation.family()) + << ":" << DebugString(mutation.qualifier()); + (*ignore_cell_flags)[i] = true; + } + not_exist_cell_set->insert(cell_key); + } +} + bool TabletWriter::CheckIllegalRowArg(const RowMutationSequence& row_mu, const std::set& cf_set, StatusCode* status) { @@ -401,6 +468,8 @@ void TabletWriter::CheckRows(WriteTaskBuffer* task_buffer) { } std::set commit_row_key_set; + // for PutIfAbsent, make sure only one PutIfAbsent operation in a cell + std::unordered_set not_exist_cell_set; for (uint32_t task_idx = 0; task_idx < task_buffer->size(); ++task_idx) { WriteTask& task = (*task_buffer)[task_idx]; std::vector& row_mutation_vec = *task.row_mutation_vec; @@ -408,9 +477,15 @@ void TabletWriter::CheckRows(WriteTaskBuffer* task_buffer) { for (uint32_t row_idx = 0; row_idx < row_mutation_vec.size(); ++row_idx) { const RowMutationSequence* row_mu = row_mutation_vec[row_idx]; + IgnoreCellFlags ignore_cell_flags; + // init all cell not ignored + ignore_cell_flags.assign(row_mu->mutation_sequence_size(), false); + task.ignore_row_vec.push_back(ignore_cell_flags); + if(CheckSingleRowTxnConflict(*row_mu, &commit_row_key_set, &status_vec[row_idx])) { continue; } + MarkPutIfAbsentConflict(*row_mu, &(task.ignore_row_vec.back()), ¬_exist_cell_set); if (CheckIllegalRowArg(*row_mu, cf_set, &status_vec[row_idx])) { continue; } @@ -421,18 +496,28 @@ void TabletWriter::CheckRows(WriteTaskBuffer* task_buffer) { } StatusCode TabletWriter::FlushToDiskBatch(WriteTaskBuffer* task_buffer) { - int64_t ts = get_micros(); + int64_t start_ts, check_cost, batch_cost, write_cost, finish_cost; + + start_ts = get_micros(); CheckRows(task_buffer); + check_cost = get_micros(); leveldb::WriteBatch batch; BatchRequest(task_buffer, &batch); + batch_cost = get_micros(); StatusCode status = kTabletNodeOk; const bool disable_wal = false; tablet_->WriteBatch(&batch, disable_wal, FLAGS_tera_sync_log, &status); batch.Clear(); + write_cost = get_micros(); FinishTask(task_buffer, status); - VLOG(7) << "finish a batch: " << task_buffer->size() << ", use " << get_micros() - ts; + finish_cost = get_micros(); + VLOG(7) << "finish a batch: " << task_buffer->size() << ", cost(check/batch/write/finish): " + << check_cost - start_ts << "/" + << batch_cost - check_cost << "/" + << write_cost - batch_cost << "/" + << finish_cost - write_cost; return status; } diff --git a/src/io/tablet_writer.h b/src/io/tablet_writer.h index 561db7b1d..b0019ec8b 100644 --- a/src/io/tablet_writer.h +++ b/src/io/tablet_writer.h @@ -6,6 +6,8 @@ #define TERA_TABLETNODE_TABLET_WRITER_H_ #include +#include +#include #include "common/event.h" #include "common/mutex.h" @@ -27,11 +29,16 @@ class TabletWriter { public: typedef std::function*, \ std::vector*)> WriteCallback; + + typedef std::vector IgnoreCellFlags; struct WriteTask { + WriteTask():start_time(get_micros()) {} std::vector* row_mutation_vec; std::vector* status_vec; + std::vector ignore_row_vec; WriteCallback callback; + int64_t start_time; }; typedef std::vector WriteTaskBuffer; @@ -47,6 +54,7 @@ class TabletWriter { bool kv_only); void Start(); void Stop(); + bool IsBusy(); private: void DoWork(); @@ -57,6 +65,11 @@ class TabletWriter { bool CheckSingleRowTxnConflict(const RowMutationSequence& row_mu, std::set* commit_row_key_set, StatusCode* status); + // mark conflict of PutIfAbsent + void MarkPutIfAbsentConflict(const RowMutationSequence& row_mu, + IgnoreCellFlags* ignore_cell_flags, + std::unordered_set* not_exist_cell_set); + bool CheckIllegalRowArg(const RowMutationSequence& row_mu, const std::set& cf_set, StatusCode* status); diff --git a/src/io/test/load_test.cc b/src/io/test/load_test.cc index 714758a5f..7351488ea 100644 --- a/src/io/test/load_test.cc +++ b/src/io/test/load_test.cc @@ -24,7 +24,7 @@ #include "leveldb/table_utils.h" #include "proto/proto_helper.h" #include "proto/status_code.pb.h" -#include "utils/timer.h" +#include "common/timer.h" #include "utils/utils_cmd.h" DECLARE_int32(tera_io_retry_max_times); @@ -104,7 +104,8 @@ TEST_F(TabletIOTest, General) { leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); assert(s.ok()); EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); std::string key = "555"; std::string value = "value of 555"; @@ -147,7 +148,8 @@ TEST_F(TabletIOTest, CurrentLost) { assert(s.ok()); ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); env->ResetMock(); } @@ -178,7 +180,8 @@ TEST_F(TabletIOTest, CurrentReadFailed) { assert(s.ok()); ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); env->ResetMock(); } @@ -216,7 +219,8 @@ TEST_F(TabletIOTest, CurrentCorrupted) { assert(s.ok()); ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); env->ResetMock(); } @@ -254,7 +258,8 @@ TEST_F(TabletIOTest, ManifestLost) { assert(s.ok()); ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); env->ResetMock(); } @@ -284,7 +289,8 @@ TEST_F(TabletIOTest, ManifestReadFailed) { assert(s.ok()); ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); env->ResetMock(); } @@ -322,7 +328,8 @@ TEST_F(TabletIOTest, ManifestCorrupted) { assert(s.ok()); ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); env->ResetMock(); } @@ -353,7 +360,8 @@ TEST_F(TabletIOTest, SstLost) { assert(s.ok()); ASSERT_FALSE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); env->ResetMock(); } @@ -367,26 +375,30 @@ TEST_F(TabletIOTest, SstLostButIgnore) { TabletIO tablet(key_start, key_end, tablet_path); leveldb::MockEnv* env = (leveldb::MockEnv*)LeveldbMockEnv(); - std::string fname = mock_env_prefix + tablet_path + "/0/__oops"; - int fd = open(fname.c_str(), O_RDWR | O_CREAT); - if (fd == -1) { - std::cout << strerror(errno) << fname << std::endl; - abort(); - } env->SetPrefix(mock_env_prefix); - env->SetGetChildrenCallback(DropSst); tablet.SetMockEnv(env); leveldb::Logger* ldb_logger; leveldb::Status s = leveldb::Env::Default()->NewLogger("./log/leveldblog", &ldb_logger); assert(s.ok()); + std::set ignore_err_lgs; + ignore_err_lgs.insert("lg0"); + TableSchema schema = TableSchema(); + + LocalityGroupSchema* lg = schema.add_locality_groups(); + lg->set_name("lg0"); + + ColumnFamilySchema* cf = schema.add_column_families(); + cf->set_name("column"); + cf->set_locality_group("lg0"); + cf->set_max_versions(3); - ASSERT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, ldb_logger, NULL, NULL, &status)); + ASSERT_TRUE(tablet.Load(schema, tablet_path, std::vector(), + ignore_err_lgs, empty_snaphsots_, + empty_rollback_, ldb_logger, NULL, NULL, &status)); env->ResetMock(); - close(fd); } //#endif diff --git a/src/io/test/tablet_io_test.cc b/src/io/test/tablet_io_test.cc index 90da431f9..5aa7f12fa 100644 --- a/src/io/test/tablet_io_test.cc +++ b/src/io/test/tablet_io_test.cc @@ -18,7 +18,7 @@ #include "leveldb/table_utils.h" #include "proto/proto_helper.h" #include "proto/status_code.pb.h" -#include "utils/timer.h" +#include "common/timer.h" #include "utils/utils_cmd.h" #include "utils/string_util.h" #include "io/tablet_scanner.h" @@ -93,7 +93,8 @@ TEST_F(TabletIOTest, General) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); std::string key = "555"; std::string value = "value of 555"; @@ -118,7 +119,8 @@ TEST_F(TabletIOTest, Split) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); // prepare test data EXPECT_TRUE(PrepareTestData(&tablet, N)); @@ -139,7 +141,8 @@ TEST_F(TabletIOTest, Split) { key_end = "8000"; TabletIO other_tablet(key_start, key_end, tablet_path); EXPECT_TRUE(other_tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); other_tablet.GetDataSize(&size, NULL, &status); LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; @@ -155,7 +158,8 @@ TEST_F(TabletIOTest, Split) { key_end = "5000"; TabletIO l_tablet(key_start, key_end, tablet_path); EXPECT_TRUE(l_tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); l_tablet.GetDataSize(&size, NULL, &status); LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; @@ -165,7 +169,8 @@ TEST_F(TabletIOTest, Split) { key_end = ""; TabletIO r_tablet(key_start, key_end, tablet_path); EXPECT_TRUE(r_tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); r_tablet.GetDataSize(&size, NULL, &status); LOG(INFO) << "table[" << key_start << ", " << key_end << "]: size = " << size; @@ -182,7 +187,8 @@ TEST_F(TabletIOTest, SplitAndCheckSize) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); // prepare test data EXPECT_TRUE(PrepareTestData(&tablet, N)); @@ -202,7 +208,8 @@ TEST_F(TabletIOTest, SplitAndCheckSize) { // open from split key to check scope size TabletIO l_tablet(key_start, split_key, tablet_path); EXPECT_TRUE(l_tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); l_tablet.GetDataSize(&size, NULL, &status); LOG(INFO) << "table[" << key_start << ", " << split_key << "]: size = " << size; @@ -210,7 +217,8 @@ TEST_F(TabletIOTest, SplitAndCheckSize) { TabletIO r_tablet(split_key, key_end, tablet_path); EXPECT_TRUE(r_tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); r_tablet.GetDataSize(&size, NULL, &status); LOG(INFO) << "table[" << split_key << ", " << key_end << "]: size = " << size; @@ -227,7 +235,8 @@ TEST_F(TabletIOTest, OverWrite) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); std::string key = "555"; std::string value = "value of 555"; @@ -253,7 +262,8 @@ TEST_F(TabletIOTest, Compact) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); // prepare test data EXPECT_TRUE(PrepareTestData(&tablet, 100)); @@ -269,7 +279,8 @@ TEST_F(TabletIOTest, Compact) { std::string new_key_end = StringFormat("%011llu", 50); // NumberToString(800); TabletIO new_tablet(new_key_start, new_key_end, tablet_path); EXPECT_TRUE(new_tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); EXPECT_TRUE(new_tablet.Compact(0, &status)); uint64_t new_table_size = 0; @@ -291,6 +302,110 @@ TEST_F(TabletIOTest, Compact) { EXPECT_TRUE(new_tablet.Unload()); } +TEST_F(TabletIOTest, LowLevelSeek) { + std::string tablet_path = working_dir + "llseek_tablet"; + std::string key_start = ""; + std::string key_end = ""; + StatusCode status; + + TabletIO tablet(key_start, key_end, tablet_path); + EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); + + // init scan + ScanOptions scan_options; + ColumnFamilyMap cf_map; + std::set qu_set; + qu_set.insert("qualifer"); + qu_set.insert("2a"); + qu_set.insert("1a"); + cf_map["column"] = qu_set; + scan_options.column_family_list = cf_map; + scan_options.iter_cf_set.insert("column"); + + std::string tkey1; + // delete this key + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "" , false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "" , false, NULL); + + // write cell + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "qualifer", get_micros(), leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala" , false, NULL); + RowResult value_list; + + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 1); + + // delete cell + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "" , false, NULL); + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + + // write cell again + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "2a", get_micros(), leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala" , false, NULL); + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 1); + + // clean + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); + tablet.WriteOne(tkey1, "", false, NULL); + + // write 5 versions + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala1", false, NULL); + int64_t start_ts = get_micros(); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", start_ts, leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala2", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala3", false, NULL); + int64_t end_ts = get_micros(); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", end_ts, leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala4", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala5", false, NULL); + tablet.GetRawKeyOperator()->EncodeTeraKey("row1", "column", "1a", get_micros(), leveldb::TKT_VALUE, &tkey1); + tablet.WriteOne(tkey1, "lala5", false, NULL); + + // read all versions ( write 5 versions, but schema set max_versions = 3 ) + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 3); + + // for max_versions + // read 2 versions + scan_options.max_versions = 2; + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 2); + + // for timerange and max_versions + // read 2 versions ( write 5 versions, but schema set max_versions = 3) + scan_options.max_versions = 4; + scan_options.ts_start = start_ts; + scan_options.ts_end = end_ts; + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 2); + + // start_ts not in top 3 versions + scan_options.ts_start = start_ts; + scan_options.ts_end = start_ts; + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 0); + + // end_ts in top 3 versions + scan_options.ts_start = end_ts; + scan_options.ts_end = end_ts; + EXPECT_TRUE(tablet.LowLevelSeek("row", scan_options, &value_list, &status)); + EXPECT_EQ(value_list.key_values_size(), 1); + + EXPECT_TRUE(tablet.Unload()); +} + TEST_F(TabletIOTest, LowLevelScan) { std::string tablet_path = working_dir + "llscan_tablet"; std::string key_start = ""; @@ -299,7 +414,8 @@ TEST_F(TabletIOTest, LowLevelScan) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); std::string tkey1; @@ -322,19 +438,19 @@ TEST_F(TabletIOTest, LowLevelScan) { uint32_t read_bytes = 0; bool is_complete = false; EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, NULL)); + &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); EXPECT_EQ(value_list.key_values_size(), 1); tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); tablet.WriteOne(tkey1, "lala" , false, NULL); EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, NULL)); + &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); EXPECT_EQ(value_list.key_values_size(), 0); tablet.GetRawKeyOperator()->EncodeTeraKey("row", "column", "2a", get_micros(), leveldb::TKT_VALUE, &tkey1); tablet.WriteOne(tkey1, "lala" , false, NULL); EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, "", ScanOptions(), - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, NULL)); + &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); EXPECT_EQ(value_list.key_values_size(), 1); tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", get_micros(), leveldb::TKT_DEL, &tkey1); @@ -357,17 +473,17 @@ TEST_F(TabletIOTest, LowLevelScan) { end_row_key = std::string("row1\0", 5); ScanOptions scan_options; EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, NULL)); + &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); EXPECT_EQ(value_list.key_values_size(), 5); tablet.GetRawKeyOperator()->EncodeTeraKey("row", "", "", 0, leveldb::TKT_FORSEEK, &start_tera_key); end_row_key = std::string("row\0", 5); scan_options.column_family_list["column"].insert("1a"); EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, NULL)); + &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); EXPECT_EQ(value_list.key_values_size(), 3); scan_options.max_versions = 2; EXPECT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, scan_options, - &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, NULL)); + &value_list, &next_start_point, &read_row_count, &read_bytes, &is_complete, &status)); EXPECT_EQ(value_list.key_values_size(), 2); EXPECT_TRUE(tablet.Unload()); } @@ -382,7 +498,8 @@ TEST_F(TabletIOTest, SplitToSubTable) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); // prepare test data EXPECT_TRUE(PrepareTestData(&tablet, N / 2, 0)); @@ -391,7 +508,8 @@ TEST_F(TabletIOTest, SplitToSubTable) { // make sure all data are dumped into sst EXPECT_TRUE(tablet.Unload()); EXPECT_TRUE(tablet.Load(TableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); // for first tablet tablet.GetDataSize(&size, NULL, &status); @@ -418,7 +536,8 @@ TEST_F(TabletIOTest, SplitToSubTable) { // 1. load sub-table 1 TabletIO l_tablet(key_start, split_key, split_path_1); EXPECT_TRUE(l_tablet.Load(TableSchema(), split_path_1, parent_tablet, - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); l_tablet.GetDataSize(&size, NULL, &status); LOG(INFO) << "table[" << key_start << ", " << split_key << "]: size = " << size; @@ -436,7 +555,8 @@ TEST_F(TabletIOTest, SplitToSubTable) { // 2. load sub-table 2 TabletIO r_tablet(split_key, key_end, split_path_2); EXPECT_TRUE(r_tablet.Load(TableSchema(), split_path_2, parent_tablet, - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); r_tablet.GetDataSize(&size, NULL, &status); LOG(INFO) << "table[" << split_key << ", " << key_end << "]: size = " << size; @@ -554,7 +674,8 @@ TEST_F(TabletIOTest, RowBloomFilter) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); // prepare data leveldb::WriteBatch batch; @@ -594,7 +715,7 @@ TEST_F(TabletIOTest, RowBloomFilter) { bool is_complete = false; ASSERT_TRUE(tablet.LowLevelScan(start_tera_key, end_row_key, ScanOptions(), &value_list, &next_start_point, &read_row_count, &read_bytes, - &is_complete, NULL)); + &is_complete, &status)); ASSERT_EQ(value_list.key_values_size(), CR); for (int32_t j = 0; j < CR; j++) { char buf[16]; diff --git a/src/io/test/tablet_scanner_test.cc b/src/io/test/tablet_scanner_test.cc index a53f2d52a..915ad6a92 100644 --- a/src/io/test/tablet_scanner_test.cc +++ b/src/io/test/tablet_scanner_test.cc @@ -20,7 +20,7 @@ #include "leveldb/table_utils.h" #include "proto/proto_helper.h" #include "proto/status_code.pb.h" -#include "utils/timer.h" +#include "common/timer.h" #include "utils/utils_cmd.h" DECLARE_string(tera_tabletnode_path_prefix); @@ -225,7 +225,8 @@ TEST_F(TabletScannerTest, General) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); PrepareData(&tablet, 1000000); uint64_t nr = 400; @@ -246,7 +247,8 @@ TEST_F(TabletScannerTest, CacheEvict) { TabletIO tablet(key_start, key_end, tablet_path); EXPECT_TRUE(tablet.Load(GetTableSchema(), tablet_path, std::vector(), - empty_snaphsots_, empty_rollback_, NULL, NULL, NULL, &status)); + std::set(), empty_snaphsots_, + empty_rollback_, NULL, NULL, NULL, &status)); PrepareData(&tablet, 1000000); diff --git a/src/io/utils_leveldb.cc b/src/io/utils_leveldb.cc index 033ad4d37..48a6fa2c8 100644 --- a/src/io/utils_leveldb.cc +++ b/src/io/utils_leveldb.cc @@ -14,6 +14,8 @@ #include "common/base/string_number.h" #include "common/file/file_path.h" #include "common/mutex.h" +#include "common/timer.h" +#include "db/filename.h" #include "io/timekey_comparator.h" #include "leveldb/comparator.h" #include "leveldb/env_dfs.h" @@ -21,7 +23,7 @@ #include "leveldb/env_inmem.h" #include "leveldb/env_mock.h" #include "leveldb/table_utils.h" -#include "utils/timer.h" +#include "common/timer.h" DECLARE_string(tera_leveldb_env_type); DECLARE_string(tera_leveldb_env_dfs_type); @@ -31,6 +33,7 @@ DECLARE_string(tera_leveldb_env_hdfs2_nameservice_list); DECLARE_string(tera_tabletnode_path_prefix); DECLARE_string(tera_dfs_so_path); DECLARE_string(tera_dfs_conf); +DECLARE_int64(tera_master_gc_trash_expire_time_s); namespace tera { namespace io { @@ -99,6 +102,11 @@ std::string GetTrashDir() { return FLAGS_tera_tabletnode_path_prefix + "/" + trash; } +std::string GetTrackableGcTrashDir() { + const std::string trash("#trackable_gc_trash"); + return FLAGS_tera_tabletnode_path_prefix + "/" + trash; +} + bool MoveEnvDirToTrash(const std::string& tablename) { leveldb::Env* env = LeveldbBaseEnv(); std::string src_dir = FLAGS_tera_tabletnode_path_prefix + "/" + tablename; @@ -140,6 +148,75 @@ bool MoveEnvDirToTrash(const std::string& tablename) { return true; } +leveldb::Status MoveSstToTrackableGcTrash(const std::string& table_name, + uint64_t tablet_id, + uint32_t lg_id, + uint64_t file_id) { + leveldb::Status s; + leveldb::Env* env = LeveldbBaseEnv(); + std::string table_path = FLAGS_tera_tabletnode_path_prefix + table_name; + std::string src_path = leveldb::BuildTableFilePath(table_path, tablet_id, lg_id, file_id); + + s = env->FileExists(src_path); + if(s.IsNotFound()) { + // not found, so no need to move + return leveldb::Status::OK(); + } else if (!s.ok()) { + // unknown status + return s; + } + + std::string trash_dir = GetTrackableGcTrashDir(); + s = env->FileExists(trash_dir); + if (s.IsNotFound()) { + if (!env->CreateDir(trash_dir).ok()) { + LOG(ERROR) << "[gc] fail to create trackable gc trash dir: " << trash_dir; + return leveldb::Status::IOError("fail to create trackable gc trash dir"); + } else { + LOG(INFO) << "[gc] succeed in creating trackable gc trash dir: " << trash_dir; + } + } else if (!s.ok()) { + // unknown status + return s; + } + + std::string time = get_curtime_str(); + std::replace(time.begin(), time.end(), ':', '-'); + std::string dest_path = leveldb::BuildTrashTableFilePath( + trash_dir + "/" + table_name, tablet_id, lg_id, file_id, time); + + size_t dir_pos = dest_path.rfind("/"); + if (dir_pos == std::string::npos) { + LOG(ERROR) << "[gc] invalid dest path: " << dest_path; + return leveldb::Status::IOError("invalid dest path"); + } + std::string lg_path = dest_path.substr(0, dir_pos); + s = env->FileExists(lg_path); + if(s.IsNotFound()) { + // not found, so no need to mkdir + s = env->CreateDir(lg_path); + if (!s.ok()) { + LOG(ERROR) << "[gc] create lg dir in trash: " << lg_path + << " failed: " << s.ToString(); + return s; + } + } else if (!s.ok()) { + // unknown status + return s; + } + + s = env->RenameFile(src_path, dest_path); + if (!s.ok()) { + LOG(ERROR) << "[gc] fail to move file to trackable gc trash, src_path: " << src_path + << ", dest_path: " << dest_path << ", status: " << s.ToString(); + return s; + } + LOG(INFO) << "[gc] move file to trackable gc trash, src_path: " << src_path + << ", dest_path: " << dest_path; + + return leveldb::Status::OK(); +} + void CleanTrashDir() { leveldb::Env* env = LeveldbBaseEnv(); std::string trash_dir = GetTrashDir(); @@ -156,6 +233,136 @@ void CleanTrashDir() { return; } +bool TryDeleteEmptyDir(const std::string& dir_path, + size_t total_children_size, + size_t deleted_children_size) { + bool deleted = false; + + if (deleted_children_size == total_children_size) { + leveldb::Status s; + leveldb::Env* env = LeveldbBaseEnv(); + s = env->DeleteDir(dir_path); + if (s.ok()) { + LOG(INFO) << "[gc] delete empty dir: " << dir_path; + deleted = true; + } else { + LOG(WARNING) << "[gc] fail to delete empty dir: " + << dir_path <<" status: " << s.ToString(); + deleted = false; + } + } + + return deleted; +} + +leveldb::Status DeleteTrashFileIfExpired(const std::string& file_path) { + leveldb::Status s; + leveldb::Env* env = LeveldbBaseEnv(); + + std::string file_time_str = leveldb::GetTimeStrFromTrashFile(file_path); + if (file_time_str.empty()) { + LOG(ERROR) << "[gc] skip invalid trash file path: " << file_path; + return leveldb::Status::Corruption("invalid trash file path"); + } + + // change time format + // eg.: change "20170801-15-54-23" to "20170801-15:54:23" + file_time_str = file_time_str.replace(file_time_str.rfind("-"), 1, ":"); + file_time_str = file_time_str.replace(file_time_str.rfind("-"), 1, ":"); + + int64_t file_time = get_timestamp_from_str(file_time_str); + int64_t current_time = time(nullptr); + if (current_time - file_time > FLAGS_tera_master_gc_trash_expire_time_s) { + s = env->DeleteFile(file_path); + if (s.ok()) { + LOG(INFO) << "[gc] delete expired trash file: " << file_path + << ", file added to trash time: " << get_time_str(file_time) + << ", current time: " << get_time_str(current_time); + } else { + LOG(ERROR) << "[gc] fail to delete expired trash file: " << file_path + <<" status: " << s.ToString(); + return s; + } + } else { + return leveldb::Status::Corruption("file not expired"); + } + + return s; +} + +void CleanTrackableGcTrash() { + leveldb::Status s; + leveldb::Env* env = LeveldbBaseEnv(); + std::string trash_dir = GetTrackableGcTrashDir(); + + s = env->FileExists(trash_dir); + if (s.IsNotFound()) { + LOG(INFO) << "[gc] skip empty trash dir: " << trash_dir + <<" status: " << s.ToString(); + return; + } + + std::vector tables; + s = env->GetChildren(trash_dir, &tables); + if (!s.ok()) { + LOG(ERROR) << "[gc] fail to list trash dir: " << trash_dir + <<" status: " << s.ToString(); + return; + } + + for (const auto& table : tables) { + std::string table_path = trash_dir + "/" + table; + std::vector tablets; + s = env->GetChildren(table_path, &tablets); + if (!s.ok()) { + LOG(ERROR) << "[gc] skip due to fail to list table dir: " << table_path + <<" status: " << s.ToString(); + continue; + } + + size_t deleted_empty_tablet_num = 0; + for (const auto& tablet : tablets) { + std::string tablet_path = table_path + "/" + tablet; + std::vector lgs; + s = env->GetChildren(tablet_path, &lgs); + if (!s.ok()) { + LOG(ERROR) << "[gc] skip due to fail to list tablet dir: " << tablet_path + <<" status: " << s.ToString(); + continue; + } + + size_t deleted_empty_lg_num = 0; + for (const auto& lg : lgs) { + std::string lg_path = tablet_path + "/" + lg; + std::vector files; + s = env->GetChildren(lg_path, &files); + if (!s.ok()) { + LOG(ERROR) << "[gc] skip due to fail to list lg dir: " << lg_path + <<" status: " << s.ToString(); + continue; + } + + size_t deleted_file_num = 0; + for (const auto& file : files) { + std::string file_path = lg_path + "/" + file; + if (DeleteTrashFileIfExpired(file_path).ok()) { + ++deleted_file_num; + } + } + if (TryDeleteEmptyDir(lg_path, files.size(), deleted_file_num)) { + ++ deleted_empty_lg_num; + } + } + if (TryDeleteEmptyDir(tablet_path, lgs.size(), deleted_empty_lg_num)) { + ++ deleted_empty_tablet_num; + } + } + TryDeleteEmptyDir(table_path, tablets.size(), deleted_empty_tablet_num); + } + + return; +} + leveldb::Status DeleteEnvDir(const std::string& dir) { leveldb::Status s; static bool is_support_rmdir = true; diff --git a/src/io/utils_leveldb.h b/src/io/utils_leveldb.h index 307c270aa..9654ce5b9 100644 --- a/src/io/utils_leveldb.h +++ b/src/io/utils_leveldb.h @@ -30,10 +30,25 @@ leveldb::Env* LeveldbMockEnv(); std::string GetTrashDir(); +std::string GetTrackableGcTrashDir(); + bool MoveEnvDirToTrash(const std::string& subdir); +leveldb::Status MoveSstToTrackableGcTrash(const std::string& table_name, + uint64_t tablet_id, + uint32_t lg_id, + uint64_t file_id); + void CleanTrashDir(); +bool TryDeleteEmptyDir(const std::string& dir_path, + size_t total_children_size, + size_t deleted_children_size); + +leveldb::Status DeleteTrashFileIfExpired(const std::string& file_path); + +void CleanTrackableGcTrash(); + leveldb::Status DeleteEnvDir(const std::string& subdir); } // namespace io diff --git a/src/lbcli_main.cc b/src/lbcli_main.cc new file mode 100644 index 000000000..c1149812c --- /dev/null +++ b/src/lbcli_main.cc @@ -0,0 +1,314 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/thread_pool.h" +#include "common/base/string_ext.h" +#include "common/base/string_number.h" +#include "common/console/progress_bar.h" +#include "common/file/file_path.h" +#include "io/coding.h" +#include "proto/kv_helper.h" +#include "proto/lb_client.h" +#include "proto/load_balancer_rpc.pb.h" +#include "proto/proto_helper.h" +#include "proto/tabletnode.pb.h" +#include "proto/tabletnode_client.h" +#include "sdk/client_impl.h" +#include "sdk/cookie.h" +#include "sdk/sdk_utils.h" +#include "sdk/sdk_zk.h" +#include "sdk/table_impl.h" +#include "tera.h" +#include "types.h" +#include "utils/crypt.h" +#include "utils/string_util.h" +#include "utils/tprinter.h" +#include "utils/utils_cmd.h" +#include "version.h" + +DECLARE_string(flagfile); + +// using FLAGS instead of isatty() for compatibility +DEFINE_bool(stdout_is_tty, true, "is stdout connected to a tty"); +DEFINE_bool(reorder_tablets, false, "reorder tablets by ts list"); +DEFINE_bool(readable, true, "readable input"); + +DECLARE_string(tera_lb_server_addr); +DECLARE_string(tera_lb_server_port); + +tera::TPrinter::PrintOpt g_printer_opt; + +using namespace tera; + +typedef std::shared_ptr TablePtr; +typedef std::shared_ptr TableImplPtr; +typedef std::map CommandTable; + +static CommandTable& GetCommandTable() { + static CommandTable command_table; + return command_table; +} + +static std::string GetServerAddr() { + return FLAGS_tera_lb_server_addr + ":" + FLAGS_tera_lb_server_port; +} + +const char* builtin_cmd_list[] = { + "safemode", + "safemode [enter | leave | get]", + + "help", + "help [cmd] \n\ + show manual for a or all cmd(s)", + + "version", + "version \n\ + show version info", +}; + +static void PrintCmdHelpInfo(const char* msg) { + if (msg == NULL) { + return; + } + int count = sizeof(builtin_cmd_list)/sizeof(char*); + for (int i = 0; i < count; i+=2) { + if(strncmp(msg, builtin_cmd_list[i], 32) == 0) { + std::cout << builtin_cmd_list[i + 1] << std::endl; + return; + } + } +} + +static void PrintCmdHelpInfo(const std::string& msg) { + PrintCmdHelpInfo(msg.c_str()); +} + +static void PrintAllCmd() { + std::cout << "there is cmd list:" << std::endl; + int count = sizeof(builtin_cmd_list)/sizeof(char*); + bool newline = false; + for (int i = 0; i < count; i+=2) { + std::cout << std::setiosflags(std::ios::left) << std::setw(20) << builtin_cmd_list[i]; + if (newline) { + std::cout << std::endl; + newline = false; + } else { + newline = true; + } + } + + std::cout << std::endl << "help [cmd] for details." << std::endl; +} + +// return false if similar command(s) not found +static bool PromptSimilarCmd(const char* msg) { + if (msg == NULL) { + return false; + } + bool found = false; + int64_t len = strlen(msg); + int64_t threshold = int64_t((len * 0.3 < 3) ? 3 : len * 0.3); + int count = sizeof(builtin_cmd_list)/sizeof(char*); + for (int i = 0; i < count; i+=2) { + if (EditDistance(msg, builtin_cmd_list[i]) <= threshold) { + if (!found) { + std::cout << "Did you mean:" << std::endl; + found = true; + } + std::cout << " " << builtin_cmd_list[i] << std::endl; + } + } + return found; +} + +static void PrintUnknownCmdHelpInfo(const char* msg) { + if (msg != NULL) { + std::cout << "'" << msg << "' is not a valid command." << std::endl << std::endl; + } + if ((msg != NULL) + && PromptSimilarCmd(msg)) { + return; + } + PrintAllCmd(); +} + +int32_t SafemodeOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc < 3) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + std::string op = argv[2]; + if (op != "get" && op != "leave" && op != "enter") { + PrintCmdHelpInfo(argv[1]); + return -1; + } + + load_balancer::LBClient lb_client(GetServerAddr()); + CmdCtrlRequest request; + CmdCtrlResponse response; + + request.set_sequence_id(0); + request.set_command("safemode"); + request.add_arg_list(op); + + string reason; + if (lb_client.CmdCtrl(&request, &response)) { + if (response.status() != tera::kLoadBalancerOk) { + reason = StatusCodeToString(response.status()); + LOG(ERROR) << reason; + std::cout << reason << std::endl; + err->SetFailed(ErrorCode::kSystem, reason); + return -1; + } + if (op == "get") { + if (response.bool_result()) { + std::cout << "true" << std::endl; + } else { + std::cout << "false" << std::endl; + } + } + return 0; + } else { + reason = "fail to CmdCtrl"; + LOG(ERROR) << reason; + std::cout << reason << std::endl; + err->SetFailed(ErrorCode::kSystem, reason); + return -1; + } +} + +int32_t HelpOp(Client*, int32_t argc, std::string* argv, ErrorCode*) { + if (argc == 2) { + PrintAllCmd(); + } else if (argc == 3) { + PrintCmdHelpInfo(argv[2]); + } else { + PrintCmdHelpInfo("help"); + } + return 0; +} + +int32_t HelpOp(int32_t argc, char** argv) { + std::vector argv_svec(argv, argv + argc); + return HelpOp(NULL, argc, &argv_svec[0], NULL); +} + +bool ParseCommand(int argc, char** arg_list, std::vector* parsed_arg_list) { + for (int i = 0; i < argc; i++) { + std::string parsed_arg = arg_list[i]; + if (FLAGS_readable && !ParseDebugString(arg_list[i], &parsed_arg)) { + std::cout << "invalid debug format of argument: " << arg_list[i] << std::endl; + return false; + } + parsed_arg_list->push_back(parsed_arg); + } + return true; +} + +static void InitializeCommandTable(){ + CommandTable& command_table = GetCommandTable(); + command_table["safemode"] = SafemodeOp; + command_table["help"] = HelpOp; +} + +int ExecuteCommand(Client* client, int argc, char** arg_list) { + int ret = 0; + ErrorCode error_code; + + std::vector parsed_arg_list; + if (!ParseCommand(argc, arg_list, &parsed_arg_list)) { + return 1; + } + std::string* argv = &parsed_arg_list[0]; + + CommandTable& command_table = GetCommandTable(); + std::string cmd = argv[1]; + if (cmd == "version") { + PrintSystemVersion(); + } else if (command_table.find(cmd) != command_table.end()) { + ret = command_table[cmd](client, argc, argv, &error_code); + } else { + PrintUnknownCmdHelpInfo(argv[1].c_str()); + ret = 1; + } + + if (error_code.GetType() != ErrorCode::kOK) { + LOG(ERROR) << "fail reason: " << error_code.ToString(); + } + return ret; +} + +int main(int argc, char* argv[]) { + FLAGS_minloglevel = 2; + ::google::ParseCommandLineFlags(&argc, &argv, true); + + if (argc > 1 && std::string(argv[1]) == "version") { + PrintSystemVersion(); + return 0; + } else if (argc > 1 && std::string(argv[1]) == "help") { + HelpOp(argc, argv); + return 0; + } + + Client* client = Client::NewClient(FLAGS_flagfile, NULL); + if (client == NULL) { + LOG(ERROR) << "client instance not exist"; + return -1; + } + g_printer_opt.print_head = FLAGS_stdout_is_tty; + + InitializeCommandTable(); + + int ret = 0; + if (argc == 1) { + char* line = NULL; + while ((line = readline("lb> ")) != NULL) { + char* line_copy = strdup(line); + std::vector arg_list; + arg_list.push_back(argv[0]); + char* tmp = NULL; + char* token = strtok_r(line, " \t", &tmp); + while (token != NULL) { + arg_list.push_back(token); + token = strtok_r(NULL, " \t", &tmp); + } + if (arg_list.size() == 2 && + (strcmp(arg_list[1], "quit") == 0 || strcmp(arg_list[1], "exit") == 0)) { + free(line_copy); + free(line); + break; + } + if (arg_list.size() > 1) { + add_history(line_copy); + ret = ExecuteCommand(client, arg_list.size(), &arg_list[0]); + } + free(line_copy); + free(line); + } + } else { + ret = ExecuteCommand(client, argc, argv); + } + + delete client; + return ret; +} diff --git a/src/leveldb/Makefile b/src/leveldb/Makefile index c9162d2eb..175c916dc 100644 --- a/src/leveldb/Makefile +++ b/src/leveldb/Makefile @@ -19,7 +19,7 @@ include ../../depends.mk include build_config.mk CFLAGS += -I. -I./include $(PLATFORM_CCFLAGS) $(OPT) -CXXFLAGS += -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) +CXXFLAGS += -std=c++11 -I. -I./include $(PLATFORM_CXXFLAGS) $(OPT) LDFLAGS += $(PLATFORM_LDFLAGS) -L$(SNAPPY_LIBDIR) -lrt -ldl -lsnappy LIBS += $(PLATFORM_LIBS) diff --git a/src/leveldb/build_detect_platform b/src/leveldb/build_detect_platform index 8e230d950..325dfaf01 100755 --- a/src/leveldb/build_detect_platform +++ b/src/leveldb/build_detect_platform @@ -22,7 +22,6 @@ # # -DLEVELDB_CSTDATOMIC_PRESENT if is present # -DLEVELDB_PLATFORM_POSIX for Posix-based platforms -# -DSNAPPY if the Snappy library is present # OUTPUT=$1 @@ -176,15 +175,6 @@ EOF COMMON_FLAGS="$COMMON_FLAGS -DLEVELDB_PLATFORM_POSIX" fi - # Test whether Snappy library is installed - # http://code.google.com/p/snappy/ - $CXX $CXXFLAGS -x c++ - -o $CXXOUTPUT 2>/dev/null < - int main() {} -EOF - COMMON_FLAGS="$COMMON_FLAGS -DSNAPPY" - PLATFORM_LIBS="$PLATFORM_LIBS" - # Test whether tcmalloc is available $CXX $CXXFLAGS -x c++ - -o $CXXOUTPUT -ltcmalloc_minimal 2>/dev/null <NumEntries()) { meta->file_size = builder->FileSize(); + meta->data_size = meta->file_size; assert(meta->file_size > 0); *saved_size = builder->SavedSize(); @@ -164,7 +165,7 @@ Status BuildTable(const std::string& dbname, if (s.ok() && meta->file_size > 0) { // Keep it - } else { + } else if (!s.IsIOPermissionDenied()) { env->DeleteFile(fname); } return s; diff --git a/src/leveldb/db/db_impl.cc b/src/leveldb/db/db_impl.cc index c076008de..e8758a8ce 100644 --- a/src/leveldb/db/db_impl.cc +++ b/src/leveldb/db/db_impl.cc @@ -9,13 +9,14 @@ #include "db/db_impl.h" #include - #include #include #include #include #include #include +#include + #include "db/builder.h" #include "db/db_iter.h" #include "db/dbformat.h" @@ -44,11 +45,17 @@ namespace leveldb { +extern Status WriteStringToFileSync(Env* env, const Slice& data, + const std::string& fname); + const int kNumNonTableCacheFiles = 10; // if this file exists, ignore error in db-opening const static std::string mark_file_name = "/__oops"; +// if this file exists, +const static std::string init_load_filelock = "/__init_load_filelock"; + // Information kept for every waiting writer struct DBImpl::Writer { WriteBatch* batch; @@ -87,6 +94,7 @@ struct DBImpl::CompactionState { TableBuilder* builder; uint64_t total_bytes; + Status status; Output* current_output() { return &outputs[outputs.size()-1]; } @@ -129,8 +137,10 @@ Options SanitizeOptions(const std::string& dbname, result.block_cache = NewLRUCache(8 << 20); } + if (result.ignore_corruption_in_open) { + Log(result.info_log, "[%s] caution: open with ignore_corruption_in_open", dbname.c_str()); + } { - // Maybe mark error flag in option std::string oops = dbname + mark_file_name; Status s = src.env->FileExists(oops); if (s.ok()) { @@ -139,7 +149,6 @@ Options SanitizeOptions(const std::string& dbname, } // Ignore error from FileExists since there is no harm } - return result; } @@ -166,10 +175,6 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) logfile_number_(0), log_(NULL), bound_log_size_(0), - bg_compaction_scheduled_(false), - bg_compaction_score_(0), - bg_compaction_timeout_(0), - bg_schedule_id_(0), manual_compaction_(NULL), consecutive_compaction_errors_(0), flush_on_destroy_(false) { @@ -188,6 +193,11 @@ DBImpl::DBImpl(const Options& options, const std::string& dbname) &internal_comparator_); } +bool DBImpl::ShouldForceUnloadOnError() { + MutexLock l(&mutex_); + return bg_error_.IsIOPermissionDenied(); +} + Status DBImpl::Shutdown1() { assert(state_ == kOpened); state_ = kShutdown1; @@ -196,12 +206,17 @@ Status DBImpl::Shutdown1() { shutting_down_.Release_Store(this); // Any non-NULL value is ok Log(options_.info_log, "[%s] wait bg compact finish", dbname_.c_str()); - if (bg_compaction_scheduled_) { - env_->ReSchedule(bg_schedule_id_, kDumpMemTableUrgentScore, 0); + std::vector::iterator it = bg_compaction_tasks_.begin(); + for (; it != bg_compaction_tasks_.end(); ++it) { + env_->ReSchedule((*it)->id, kDumpMemTableUrgentScore, 0); } - while (bg_compaction_scheduled_) { + while (bg_compaction_tasks_.size() > 0) { bg_cv_.Wait(); } + // has enconutered IOPermission Denied error, return immediately and do not try to compact memory table aynmore + if (bg_error_.IsIOPermissionDenied()) { + return bg_error_; + } Status s; if (!options_.dump_mem_on_shutdown) { @@ -231,6 +246,9 @@ Status DBImpl::Shutdown2() { state_ = kShutdown2; MutexLock l(&mutex_); + if(bg_error_.IsIOPermissionDenied()) { + return bg_error_; + } Status s; if (!options_.dump_mem_on_shutdown) { return s; @@ -315,26 +333,30 @@ void DBImpl::MaybeIgnoreError(Status* s) const { } void DBImpl::DeleteObsoleteFiles() { + mutex_.AssertHeld(); if (!bg_error_.ok()) { // After a background error, we don't know whether a new version may // or may not have been committed, so we cannot safely garbage collect. return; } + // check filesystem, and then check pending_outputs_ + std::vector filenames; + mutex_.Unlock(); + env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose + mutex_.Lock(); + // Make a set of all of the live files std::set live = pending_outputs_; versions_->AddLiveFiles(&live); // manifest file set, keep latest 3 manifest files for backup - std::set manifest_set; + //std::set manifest_set; - Log(options_.info_log, "[%s] try DeleteObsoleteFiles, total live file num: %llu\n", - dbname_.c_str(), static_cast(live.size())); - - std::vector filenames; - mutex_.Unlock(); - env_->GetChildren(dbname_, &filenames); // Ignoring errors on purpose - mutex_.Lock(); + Log(options_.info_log, "[%s] try DeleteObsoleteFiles, total live file num: %llu," + " pending_outputs %lu, children_nr %lu\n", + dbname_.c_str(), static_cast(live.size()), + pending_outputs_.size(), filenames.size()); uint64_t number; FileType type; for (size_t i = 0; i < filenames.size(); i++) { @@ -345,28 +367,28 @@ void DBImpl::DeleteObsoleteFiles() { keep = ((number >= versions_->LogNumber()) || (number == versions_->PrevLogNumber())); break; - case kDescriptorFile: - manifest_set.insert(filenames[i]); - if (manifest_set.size() > 3) { - std::set::iterator it = manifest_set.begin(); - ParseFileName(*it, &number, &type); - if (number < versions_->ManifestFileNumber()) { - // Keep my manifest file, and any newer incarnations' - // (in case there is a race that allows other incarnations) - filenames[i] = *it; - keep = false; - manifest_set.erase(it); - } - } - break; + //case kDescriptorFile: + // manifest_set.insert(filenames[i]); + // if (manifest_set.size() > 3) { + // std::set::iterator it = manifest_set.begin(); + // ParseFileName(*it, &number, &type); + // if (number < versions_->ManifestFileNumber()) { + // // Keep my manifest file, and any newer incarnations' + // // (in case there is a race that allows other incarnations) + // filenames[i] = *it; + // keep = false; + // manifest_set.erase(it); + // } + // } + // break; case kTableFile: keep = (live.find(BuildFullFileNumber(dbname_, number)) != live.end()); break; - case kTempFile: - // Any temp files that are currently being written to must - // be recorded in pending_outputs_, which is inserted into "live" - keep = (live.find(number) != live.end()); - break; + //case kTempFile: + // // Any temp files that are currently being written to must + // // be recorded in pending_outputs_, which is inserted into "live" + // keep = (live.find(number) != live.end()); + // break; case kCurrentFile: case kDBLockFile: case kInfoLogFile: @@ -381,9 +403,9 @@ void DBImpl::DeleteObsoleteFiles() { if (type == kTableFile) { table_cache_->Evict(dbname_, BuildFullFileNumber(dbname_, number)); } - Log(options_.info_log, "[%s] Delete type=%s #%lld\n", + Log(options_.info_log, "[%s] Delete type=%s #%lld, fname %s\n", dbname_.c_str(), FileTypeToString(type), - static_cast(number)); + static_cast(number), filenames[i].c_str()); mutex_.Unlock(); env_->DeleteFile(dbname_ + "/" + filenames[i]); mutex_.Lock(); @@ -548,15 +570,7 @@ Status DBImpl::DbExists(bool* exists) { Status DBImpl::Recover(VersionEdit* edit) { mutex_.AssertHeld(); - if (options_.ignore_corruption_in_open) { - Status s = env_->DeleteFile(dbname_ + mark_file_name); - if (!s.ok()) { - // legacy mark-file is dangerous - Log(options_.info_log, "[%s] delete mark-file failed for %s", - dbname_.c_str(), s.ToString().c_str()); - return Status::IOError("delete mark-file failed"); - } - } + bool need_newdb_txn = false; { Status s = env_->FileExists(dbname_); @@ -567,14 +581,24 @@ Status DBImpl::Recover(VersionEdit* edit) { dbname_.c_str(), s.ToString().c_str()); return s; } + need_newdb_txn = true; } else if (s.ok()) { - // Directory exists, do nothing + // lg directory exists and not ignore curruption in open + if (!options_.ignore_corruption_in_open) { + s = env_->FileExists(dbname_ + init_load_filelock); + if (s.ok()) { + need_newdb_txn = true; + } else if (!s.IsNotFound()) { + // Unknown status + return s; + } + } } else { // Unknown status return s; } } - + if (options_.use_file_lock) { Status s = env_->LockFile(LockFileName(dbname_), &db_lock_); if (!s.ok()) { @@ -582,6 +606,36 @@ Status DBImpl::Recover(VersionEdit* edit) { } } + if (options_.ignore_corruption_in_open) { + Status s = env_->FileExists(dbname_ + init_load_filelock); + if (s.ok()) { + s = env_->DeleteFile(dbname_ + init_load_filelock); + if (!s.ok()) { + // legacy initlock-file is dangerous + Log(options_.info_log, "[%s] delete initlock-file failed for %s", + dbname_.c_str(), s.ToString().c_str()); + return Status::IOError("delete initlock-file failed"); + } + } + s = env_->FileExists(dbname_ + mark_file_name); + if (s.ok()) { + s = env_->DeleteFile(dbname_ + mark_file_name); + if (!s.ok()) { + // legacy mark-file is dangerous + Log(options_.info_log, "[%s] delete mark-file failed for %s", + dbname_.c_str(), s.ToString().c_str()); + return Status::IOError("delete mark-file failed"); + } + } + } + + if (need_newdb_txn) { + Status s = BeginNewDbTransaction(); + if (!s.ok()) { + return s; + } + } + bool db_exists; Status s = DbExists(&db_exists); if (!s.ok()) { @@ -664,7 +718,12 @@ Status DBImpl::Recover(VersionEdit* edit) { } } } - + if (need_newdb_txn) { + Status s = CommitNewDbTransaction(); + if (!s.ok()) { + return s; + } + } if (s.ok()) { state_ = kOpened; } @@ -672,11 +731,14 @@ Status DBImpl::Recover(VersionEdit* edit) { } Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, - Version* base) { + Version* base, uint64_t* number) { mutex_.AssertHeld(); const uint64_t start_micros = env_->NowMicros(); FileMetaData meta; meta.number = BuildFullFileNumber(dbname_, versions_->NewFileNumber()); + if (number) { + *number = meta.number; + } pending_outputs_.insert(meta.number); Iterator* iter = mem->NewIterator(); Log(options_.info_log, "[%s] Level-0 table #%u: started", @@ -724,15 +786,39 @@ Status DBImpl::WriteLevel0Table(MemTable* mem, VersionEdit* edit, return s; } -Status DBImpl::CompactMemTable() { +// multithread safe +Status DBImpl::CompactMemTable(bool* sched_idle) { mutex_.AssertHeld(); assert(imm_ != NULL); + Status s; + if (sched_idle) { + *sched_idle = true; + } + if (imm_->BeingFlushed()) { + //Log(options_.info_log, "[%s] CompactMemTable conflict, seq %lu", + // dbname_.c_str(), GetLastSequence(false)); + return s; + } + imm_->SetBeingFlushed(true); + + if (imm_->ApproximateMemoryUsage() <= 0) { // imm is empty, do nothing + Log(options_.info_log, "[%s] CompactMemTable empty memtable %lu", + dbname_.c_str(), GetLastSequence(false)); + imm_->Unref(); + imm_ = NULL; + has_imm_.Release_Store(NULL); + return s; + } + if (sched_idle) { + *sched_idle = false; + } // Save the contents of the memtable as a new Table VersionEdit edit; + uint64_t number; Version* base = versions_->current(); base->Ref(); - Status s = WriteLevel0Table(imm_, &edit, base); + s = WriteLevel0Table(imm_, &edit, base, &number); base->Unref(); if (s.ok() && shutting_down_.Acquire_Load()) { @@ -741,6 +827,7 @@ Status DBImpl::CompactMemTable() { // Replace immutable memtable with the generated Table if (s.ok()) { + pending_outputs_.insert(number); // LogAndApply donot holds lock, so use pending_outputs_ to make sure new file will not be deleted edit.SetPrevLogNumber(0); edit.SetLogNumber(logfile_number_); // Earlier logs no longer needed if (imm_->GetLastSequence()) { @@ -749,6 +836,7 @@ Status DBImpl::CompactMemTable() { Log(options_.info_log, "[%s] CompactMemTable SetLastSequence %lu", dbname_.c_str(), edit.GetLastSequence()); s = versions_->LogAndApply(&edit, &mutex_); + pending_outputs_.erase(number); } if (s.ok()) { @@ -756,6 +844,9 @@ Status DBImpl::CompactMemTable() { imm_->Unref(); imm_ = NULL; has_imm_.Release_Store(NULL); + } else { + // imm dump fail, reset being flush flag + imm_->SetBeingFlushed(false); } return s; @@ -787,6 +878,8 @@ void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) { ManualCompaction manual; manual.level = level; manual.done = false; + manual.being_sched = false; + manual.compaction_conflict = kManualCompactIdle; if (begin == NULL) { manual.begin = NULL; } else { @@ -805,6 +898,9 @@ void DBImpl::TEST_CompactRange(int level, const Slice* begin,const Slice* end) { if (manual_compaction_ == NULL) { // Idle manual_compaction_ = &manual; MaybeScheduleCompaction(); + } else if (manual_compaction_->compaction_conflict == kManualCompactWakeup) { + manual_compaction_->compaction_conflict = kManualCompactIdle; + MaybeScheduleCompaction(); } else { // Running either my compaction or another compaction. bg_cv_.Wait(); } @@ -877,117 +973,144 @@ void DBImpl::AddInheritedLiveFiles(std::vector >* live) { } Status DBImpl::RecoverInsertMem(WriteBatch* batch, VersionEdit* edit) { - MutexLock lock(&mutex_); + MutexLock lock(&mutex_); - if (recover_mem_ == NULL) { - recover_mem_ = NewMemTable(); - recover_mem_->Ref(); - } - uint64_t log_sequence = WriteBatchInternal::Sequence(batch); - uint64_t last_sequence = log_sequence + WriteBatchInternal::Count(batch) - 1; + if (recover_mem_ == NULL) { + recover_mem_ = NewMemTable(); + recover_mem_->Ref(); + } + uint64_t log_sequence = WriteBatchInternal::Sequence(batch); + uint64_t last_sequence = log_sequence + WriteBatchInternal::Count(batch) - 1; - // if duplicate record, ignore - if (log_sequence <= recover_mem_->GetLastSequence()) { - assert (last_sequence <= recover_mem_->GetLastSequence()); - Log(options_.info_log, "[%s] duplicate record, ignore %lu ~ %lu", - dbname_.c_str(), log_sequence, last_sequence); - return Status::OK(); - } + // if duplicate record, ignore + if (log_sequence <= recover_mem_->GetLastSequence()) { + assert (last_sequence <= recover_mem_->GetLastSequence()); + Log(options_.info_log, "[%s] duplicate record, ignore %lu ~ %lu", + dbname_.c_str(), log_sequence, last_sequence); + return Status::OK(); + } - Status status = WriteBatchInternal::InsertInto(batch, recover_mem_); - MaybeIgnoreError(&status); + Status status = WriteBatchInternal::InsertInto(batch, recover_mem_); + MaybeIgnoreError(&status); + if (!status.ok()) { + return status; + } + if (recover_mem_->ApproximateMemoryUsage() > options_.write_buffer_size) { + edit->SetLastSequence(recover_mem_->GetLastSequence()); + status = WriteLevel0Table(recover_mem_, edit, NULL); if (!status.ok()) { - return status; - } - if (recover_mem_->ApproximateMemoryUsage() > options_.write_buffer_size) { - edit->SetLastSequence(recover_mem_->GetLastSequence()); - status = WriteLevel0Table(recover_mem_, edit, NULL); - if (!status.ok()) { - // Reflect errors immediately so that conditions like full - // file-systems cause the DB::Open() to fail. - return status; - } - recover_mem_->Unref(); - recover_mem_ = NULL; + // Reflect errors immediately so that conditions like full + // file-systems cause the DB::Open() to fail. + return status; } - return status; + recover_mem_->Unref(); + recover_mem_ = NULL; + } + return status; } Status DBImpl::RecoverLastDumpToLevel0(VersionEdit* edit) { - MutexLock lock(&mutex_); - Status status; - if (recover_mem_ == NULL) { - return status; - } + MutexLock lock(&mutex_); + Status s; + if (recover_mem_ != NULL) { if (recover_mem_->GetLastSequence() > 0) { - edit->SetLastSequence(recover_mem_->GetLastSequence()); - status = WriteLevel0Table(recover_mem_, edit, NULL); + edit->SetLastSequence(recover_mem_->GetLastSequence()); + s = WriteLevel0Table(recover_mem_, edit, NULL); } recover_mem_->Unref(); recover_mem_ = NULL; - return status; -} + } + assert(recover_mem_ == NULL); + // LogAndApply to lg's manifest + if (s.ok()) { + s = versions_->LogAndApply(edit, &mutex_); + if (s.ok()) { + DeleteObsoleteFiles(); + MaybeScheduleCompaction(); + } else { + Log(options_.info_log, "[%s] Fail to modify manifest", + dbname_.c_str()); + } + } else { + Log(options_.info_log, "[%s] Fail to dump log to level 0", dbname_.c_str()); + } + return s; +} // end of tera-specific +bool ScoreSortGreater(std::pair i, std::pair j) { + if (i.second != j.second) { + return i.second < j.second; + } else { + return i.first > j.first; + } +} void DBImpl::MaybeScheduleCompaction() { mutex_.AssertHeld(); if (shutting_down_.Acquire_Load()) { // DB is being deleted; no more background compactions + } else if (bg_error_.IsIOPermissionDenied()) { + // We have met an PermissionDenied error, not try to do compaction anymore, the tablet will be unloaded soon } else { - uint64_t timeout = 0; - double score = versions_->CompactionScore(&timeout); - if (manual_compaction_ != NULL) { - score = kManualCompactScore; - timeout = 0; - } - if (imm_ != NULL) { - score = kDumpMemTableScore; - timeout = 0; - } - if (score > 0) { - if (!bg_compaction_scheduled_) { - bg_schedule_id_ = env_->Schedule(&DBImpl::BGWork, this, score, timeout); - Log(options_.info_log, "[%s] Schedule Compact[%ld] score= %.2f, timeout=%lu", - dbname_.c_str(), bg_schedule_id_, score, timeout); - bg_compaction_score_ = score; - bg_compaction_timeout_ = timeout; - bg_compaction_scheduled_ = true; - assert(score <= 1 || timeout == 0); // if score > 1, then timeout MUST be 0 - } else { - // use the same way to compute priority score, like util/thread_pool.h - bool need_resched = false; - if (timeout != bg_compaction_timeout_) { - need_resched = timeout < bg_compaction_timeout_; - } else if (score != bg_compaction_score_) { - need_resched = score > bg_compaction_score_; - } - - if (need_resched) { - env_->ReSchedule(bg_schedule_id_, score, timeout); - Log(options_.info_log, "[%s] ReSchedule Compact[%ld] score= %.2f, timeout=%lu", - dbname_.c_str(), bg_schedule_id_, score, timeout); - bg_compaction_score_ = score; - bg_compaction_timeout_ = timeout; - assert(score <= 1 || timeout == 0); // if score > 1, then timeout MUST be 0 + std::vector > scores; + if (imm_ && !imm_->BeingFlushed()) { + scores.push_back(std::pair(kDumpMemTableScore, 0)); + } + if (manual_compaction_ && !manual_compaction_->being_sched && + (manual_compaction_->compaction_conflict != kManualCompactConflict)) { + scores.push_back(std::pair(kManualCompactScore, 0)); + } + versions_->CompactionScore(&scores); + + size_t qlen = scores.size() > bg_compaction_tasks_.size() ? scores.size(): bg_compaction_tasks_.size(); + for (size_t i = 0; i < bg_compaction_tasks_.size(); i++) { + CompactionTask* task = bg_compaction_tasks_[i]; + scores.push_back(std::pair(task->score, task->timeout)); + } + std::sort(scores.begin(), scores.end(), ScoreSortGreater); + + for (size_t i = 0; i < qlen; i++) { + if (bg_compaction_tasks_.size() < options_.max_background_compactions) { + if (i < bg_compaction_tasks_.size()) { // try reschedule + CompactionTask* task = bg_compaction_tasks_[i]; + if (ScoreSortGreater(scores[i], std::pair(task->score, task->timeout))) { // resched + task->score = scores[i].first; + task->timeout = scores[i].second; + env_->ReSchedule(task->id, task->score, task->timeout); + Log(options_.info_log, "[%s] ReSchedule Compact[%ld] score= %.2f, timeout=%lu, currency %d", + dbname_.c_str(), task->id, task->score, task->timeout, (int)bg_compaction_tasks_.size()); + assert(scores[i].first <= 1 || scores[i].second == 0); // if score > 1, then timeout MUST be 0 + } + } else { // new compact task + CompactionTask* task = new CompactionTask; + task->db = this; + task->score = scores[i].first; + task->timeout = scores[i].second; + bg_compaction_tasks_.push_back(task); + task->id = env_->Schedule(&DBImpl::BGWork, task, task->score, task->timeout); + Log(options_.info_log, "[%s] Schedule Compact[%ld] score= %.2f, timeout=%lu, currency %d", + dbname_.c_str(), task->id, task->score, task->timeout, (int)bg_compaction_tasks_.size()); + assert(scores[i].first <= 1 || scores[i].second == 0); // if score > 1, then timeout MUST be 0 } } - } else { - // No work to be done } } + return; } -void DBImpl::BGWork(void* db) { - reinterpret_cast(db)->BackgroundCall(); +void DBImpl::BGWork(void* task) { + CompactionTask* ctask = reinterpret_cast(task); + reinterpret_cast(ctask->db)->BackgroundCall(ctask); } -void DBImpl::BackgroundCall() { - Log(options_.info_log, "[%s] BackgroundCall", dbname_.c_str()); +void DBImpl::BackgroundCall(CompactionTask* task) { MutexLock l(&mutex_); - assert(bg_compaction_scheduled_); + Log(options_.info_log, "[%s] BackgroundCompact[%ld] score= %.2f currency %d", + dbname_.c_str(), task->id, task->score, (int)bg_compaction_tasks_.size()); + bool sched_idle = false; if (!shutting_down_.Acquire_Load()) { - Status s = BackgroundCompaction(); + Status s = BackgroundCompaction(&sched_idle); if (s.ok()) { // Success consecutive_compaction_errors_ = 0; @@ -1001,12 +1124,12 @@ void DBImpl::BackgroundCall() { bg_cv_.SignalAll(); // In case a waiter can proceed despite the error Log(options_.info_log, "[%s] Waiting after background compaction error: %s, retry: %d", dbname_.c_str(), s.ToString().c_str(), consecutive_compaction_errors_); - mutex_.Unlock(); ++consecutive_compaction_errors_; - if (consecutive_compaction_errors_ > 100000) { + if (s.IsIOPermissionDenied() || consecutive_compaction_errors_ > 100000) { bg_error_ = s; consecutive_compaction_errors_ = 0; } + mutex_.Unlock(); int seconds_to_sleep = 1; for (int i = 0; i < 3 && i < consecutive_compaction_errors_ - 1; ++i) { seconds_to_sleep *= 2; @@ -1014,36 +1137,53 @@ void DBImpl::BackgroundCall() { env_->SleepForMicroseconds(seconds_to_sleep * 1000000); mutex_.Lock(); } + } else { + sched_idle = true; } - bg_compaction_scheduled_ = false; + std::vector::iterator task_id = std::find(bg_compaction_tasks_.begin(), + bg_compaction_tasks_.end(), + task); + assert(task_id != bg_compaction_tasks_.end()); + bg_compaction_tasks_.erase(task_id); + delete task; // Previous compaction may have produced too many files in a level, // so reschedule another compaction if needed. - MaybeScheduleCompaction(); + if (!sched_idle) { + MaybeScheduleCompaction(); + } bg_cv_.SignalAll(); } -Status DBImpl::BackgroundCompaction() { +Status DBImpl::BackgroundCompaction(bool* sched_idle) { mutex_.AssertHeld(); - if (imm_ != NULL) { - return CompactMemTable(); + *sched_idle = false; + if (imm_ && !imm_->BeingFlushed()) { + return CompactMemTable(sched_idle); } - Compaction* c; + Status status; + Compaction* c = NULL; bool is_manual = (manual_compaction_ != NULL); InternalKey manual_end; if (is_manual) { ManualCompaction* m = manual_compaction_; - c = versions_->CompactRange(m->level, m->begin, m->end); - m->done = (c == NULL); + if (m->being_sched) { // other thread doing manual compaction or range being compacted + return status; + } + m->being_sched = true; + bool conflict = false; + c = versions_->CompactRange(m->level, m->begin, m->end, &conflict); + m->compaction_conflict = conflict? kManualCompactConflict : kManualCompactIdle; + m->done = (c == NULL && !conflict); if (c != NULL) { manual_end = c->input(0, c->num_input_files(0) - 1)->largest; } Log(options_.info_log, - "[%s] Manual compaction at level-%d from %s .. %s; will stop at %s\n", - dbname_.c_str(), m->level, + "[%s] Manual compaction, conflit %u, at level-%d from %s .. %s; will stop at %s\n", + dbname_.c_str(), conflict, m->level, (m->begin ? m->begin->DebugString().c_str() : "(begin)"), (m->end ? m->end->DebugString().c_str() : "(end)"), (m->done ? "(end)" : manual_end.DebugString().c_str())); @@ -1051,9 +1191,9 @@ Status DBImpl::BackgroundCompaction() { c = versions_->PickCompaction(); } - Status status; if (c == NULL) { // Nothing to do + *sched_idle = true; } else if (!is_manual && c->IsTrivialMove()) { // Move file to next level assert(c->num_input_files(0) == 1); @@ -1070,12 +1210,9 @@ Status DBImpl::BackgroundCompaction() { static_cast(f->file_size), status.ToString().c_str(), versions_->LevelSummary(&tmp)); + versions_->ReleaseCompaction(c, status); } else { - CompactionState* compact = new CompactionState(c); - status = DoCompactionWork(compact); - CleanupCompaction(compact); - c->ReleaseInputs(); - DeleteObsoleteFiles(); + status = ParallelCompaction(c); } delete c; @@ -1094,16 +1231,127 @@ Status DBImpl::BackgroundCompaction() { if (is_manual) { ManualCompaction* m = manual_compaction_; - if (!status.ok()) { - m->done = true; + m->being_sched = false; + if (m->compaction_conflict != kManualCompactConflict) { // PickRange success + if (!status.ok()) { + m->done = true; + } + if (!m->done) { + // We only compacted part of the requested range. Update *m + // to the range that is left to be compacted. + m->tmp_storage = manual_end; + m->begin = &m->tmp_storage; + } + manual_compaction_ = NULL; } - if (!m->done) { - // We only compacted part of the requested range. Update *m - // to the range that is left to be compacted. - m->tmp_storage = manual_end; - m->begin = &m->tmp_storage; + } else if (manual_compaction_ != NULL) { // non manual compact + ManualCompaction* m = manual_compaction_; + m->compaction_conflict = kManualCompactWakeup;// Wakeup here, ManualCompact thread check it + Log(options_.info_log, + "[%s] Wakeup Manual compaction at level-%d from %s .. %s", + dbname_.c_str(), m->level, + (m->begin ? m->begin->DebugString().c_str() : "(begin)"), + (m->end ? m->end->DebugString().c_str() : "(end)")); + } + return status; +} + +Status DBImpl::ParallelCompaction(Compaction* c) { + const uint64_t start_micros = env_->NowMicros(); + std::vector compaction_vec; + std::vector compaction_state_vec; + std::vector compact_stragety_vec; + assert(versions_->NumLevelFiles(c->level()) > 0); + SequenceNumber smallest_snapshot = snapshots_.empty() ? kMaxSequenceNumber : *(snapshots_.begin()); + versions_->GenerateSubCompaction(c, &compaction_vec, &mutex_); + mutex_.Unlock(); + + // handle compaction without Lock + std::vector thread_pool; + thread_pool.reserve(compaction_vec.size() - 1); + Log(options_.info_log, "[%s] parallel compacting %d@%d + %d@%d files, " + "sub_compact %lu, snapshot %lu\n", + dbname_.c_str(), + c->num_input_files(0), + c->level(), + c->num_input_files(1), + c->output_level(), + compaction_vec.size(), + smallest_snapshot); + for (size_t i = 0; i < compaction_vec.size(); i++) { + CompactionState* compaction = new CompactionState(compaction_vec[i]); + assert(compaction->builder == NULL); + assert(compaction->outfile == NULL); + compaction->smallest_snapshot = smallest_snapshot; + compaction_state_vec.push_back(compaction); + + CompactStrategy* compact_strategy = NewCompactStrategy(compaction); + compact_stragety_vec.push_back(compact_strategy); + if (i == 0) { + Log(options_.info_log, "[%s] compact strategy: %s, snapshot %lu\n", + dbname_.c_str(), + compact_strategy->Name(), + compaction->smallest_snapshot); } - manual_compaction_ = NULL; + + if (i < compaction_vec.size() - 1) { + thread_pool.emplace_back(&DBImpl::HandleCompactionWork, this, + compaction, compact_strategy); + } else { + HandleCompactionWork(compaction, compact_strategy); + } + } + for (auto& t : thread_pool) { + t.join(); + } + + CompactionStats stats; + CompactionState* compact = new CompactionState(c); + compact->smallest_snapshot = smallest_snapshot; + for (size_t i = 0; i < compaction_vec.size(); i++) { + CompactionState* compaction = compaction_state_vec[i]; + for (auto & out : compaction->outputs) { + compact->outputs.push_back(out); + stats.bytes_written += out.file_size; + } + compact->total_bytes += compaction->total_bytes; + if (compact->status.ok()) { + compact->status = compaction->status; + } + + CompactStrategy* compact_stragety = compact_stragety_vec[i]; + delete compact_stragety; + } + for (int which = 0; which < 2; which++) { + for (int i = 0; i < compact->compaction->num_input_files(which); i++) { + stats.bytes_read += compact->compaction->input(which, i)->file_size; + } + } + + mutex_.Lock(); + Status status = compact->status; + if (status.ok()) { + status = InstallCompactionResults(compact); + } + VersionSet::LevelSummaryStorage tmp; + Log(options_.info_log, "[%s] compacted to: %s, compacte stat %s", + dbname_.c_str(), versions_->LevelSummary(&tmp), status.ToString().c_str()); + stats.micros = env_->NowMicros() - start_micros; + stats_[compact->compaction->output_level()].Add(stats); + + for (size_t i = 0; i < compaction_vec.size(); i++) { + CompactionState* compaction = compaction_state_vec[i]; + CleanupCompaction(compaction); // pop pedning output, which can be deleted in DeleteObSoleteFiles() + delete compaction_vec[i]; + } + assert(compact->builder == NULL); + assert(compact->outfile == NULL); + CleanupCompaction(compact); + + versions_->ReleaseCompaction(c, status); // current_version has reference to c->inputs_[0,1] + c->ReleaseInputs(); + if (!status.IsIOPermissionDenied()) { + DeleteObsoleteFiles(); } return status; } @@ -1120,7 +1368,9 @@ void DBImpl::CleanupCompaction(CompactionState* compact) { delete compact->outfile; for (size_t i = 0; i < compact->outputs.size(); i++) { const CompactionState::Output& out = compact->outputs[i]; - pending_outputs_.erase(BuildFullFileNumber(dbname_, out.number)); + if (pending_outputs_.erase(BuildFullFileNumber(dbname_, out.number)) > 0) { + Log(options_.info_log, "[%s] erase pending_output #%lu", dbname_.c_str(), out.number); + } } delete compact; } @@ -1138,6 +1388,8 @@ Status DBImpl::OpenCompactionOutputFile(CompactionState* compact) { out.smallest.Clear(); out.largest.Clear(); compact->outputs.push_back(out); + + Log(options_.info_log, "[%s] insert pending_output #%lu", dbname_.c_str(), file_number); mutex_.Unlock(); } @@ -1253,64 +1505,63 @@ Status DBImpl::InstallCompactionResults(CompactionState* compact) { return versions_->LogAndApply(compact->compaction->edit(), &mutex_); } -Status DBImpl::DoCompactionWork(CompactionState* compact) { - const uint64_t start_micros = env_->NowMicros(); - int64_t imm_micros = 0; // Micros spent doing imm_ compactions - - Log(options_.info_log, "[%s] Compacting %d@%d + %d@%d files", - dbname_.c_str(), - compact->compaction->num_input_files(0), - compact->compaction->level(), - compact->compaction->num_input_files(1), - compact->compaction->output_level()); - - assert(versions_->NumLevelFiles(compact->compaction->level()) > 0); - assert(compact->builder == NULL); - assert(compact->outfile == NULL); - if (snapshots_.empty()) { - compact->smallest_snapshot = kMaxSequenceNumber; - } else { - compact->smallest_snapshot = *(snapshots_.begin()); - } - +CompactStrategy* DBImpl::NewCompactStrategy(CompactionState* compact) { CompactStrategy* compact_strategy = NULL; if (options_.compact_strategy_factory) { compact_strategy = options_.compact_strategy_factory->NewInstance(); - if (snapshots_.empty()) { - compact_strategy->SetSnapshot(kMaxSequenceNumber); - } else { - compact_strategy->SetSnapshot(*(snapshots_.begin())); - } - Log(options_.info_log, "[%s] Compact strategy: %s", - dbname_.c_str(), - compact_strategy->Name()); + compact_strategy->SetSnapshot(compact->smallest_snapshot); } + return compact_strategy; +} - // Release mutex while we're actually doing the compaction work - mutex_.Unlock(); +// ** Handle sub compaction without LOCK ** +void DBImpl::HandleCompactionWork(CompactionState* compact, + CompactStrategy* compact_strategy) { + Compaction* c = compact->compaction; + Status& status = compact->status; + Iterator* input = versions_->MakeInputIterator(c); + if (c->sub_compact_start_ == "") { + input->SeekToFirst(); + } else { + input->Seek(c->sub_compact_start_); + } + Slice end_key(c->sub_compact_end_); + Log(options_.info_log, "[%s] handle %d@%d + %d@%d compact, range [%s, %s)\n", + dbname_.c_str(), + c->num_input_files(0), + c->level(), + c->num_input_files(1), + c->output_level(), + c->sub_compact_start_.c_str(), + c->sub_compact_end_.c_str()); - Iterator* input = versions_->MakeInputIterator(compact->compaction); - input->SeekToFirst(); - Status status; ParsedInternalKey ikey; std::string current_user_key; bool has_current_user_key = false; SequenceNumber last_sequence_for_key = kMaxSequenceNumber; - for (; input->Valid() && !shutting_down_.Acquire_Load(); ) { // Prioritize immutable compaction work if (has_imm_.NoBarrier_Load() != NULL) { - const uint64_t imm_start = env_->NowMicros(); mutex_.Lock(); - if (imm_ != NULL) { - CompactMemTable(); + if (imm_ && !imm_->BeingFlushed()) { + CompactMemTable(); // no need check failure, because imm_ not null if dump fail. bg_cv_.SignalAll(); // Wakeup MakeRoomForWrite() if necessary } mutex_.Unlock(); - imm_micros += (env_->NowMicros() - imm_start); } Slice key = input->key(); + if (end_key.size() > 0 && internal_comparator_.InternalKeyComparator::Compare(input->key(), end_key) >= 0) { + Log(options_.info_log, "[%s] handle %d@%d + %d@%d compact, stop at %s\n", + dbname_.c_str(), + c->num_input_files(0), + c->level(), + c->num_input_files(1), + c->output_level(), + end_key.data()); + break; // reach end_key, stop this sub compaction + } + if (compact->compaction->ShouldStopBefore(key) && compact->builder != NULL) { // should not overlap level() + 2 too much status = FinishCompactionOutputFile(compact, input); @@ -1431,10 +1682,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { } } - if (compact_strategy) { - delete compact_strategy; - } - if (status.ok() && shutting_down_.Acquire_Load()) { status = Status::IOError("Deleting DB during compaction"); } @@ -1451,28 +1698,6 @@ Status DBImpl::DoCompactionWork(CompactionState* compact) { } delete input; input = NULL; - - CompactionStats stats; - stats.micros = env_->NowMicros() - start_micros - imm_micros; - for (int which = 0; which < 2; which++) { - for (int i = 0; i < compact->compaction->num_input_files(which); i++) { - stats.bytes_read += compact->compaction->input(which, i)->file_size; - } - } - for (size_t i = 0; i < compact->outputs.size(); i++) { - stats.bytes_written += compact->outputs[i].file_size; - } - - mutex_.Lock(); - stats_[compact->compaction->output_level()].Add(stats); - - if (status.ok()) { - status = InstallCompactionResults(compact); - } - VersionSet::LevelSummaryStorage tmp; - Log(options_.info_log, - "[%s] compacted to: %s", dbname_.c_str(), versions_->LevelSummary(&tmp)); - return status; } struct IterState { @@ -1652,8 +1877,9 @@ bool DBImpl::BusyWrite() { void DBImpl::Workload(double* write_workload) { MutexLock l(&mutex_); - uint64_t timeout = 0; - double wwl = versions_->CompactionScore(&timeout); + std::vector > scores; + versions_->CompactionScore(&scores); + double wwl = scores.size() > 0? scores[0].first: 0; if (wwl >= 0) { *write_workload = wwl; } else { @@ -1837,6 +2063,71 @@ bool DBImpl::GetProperty(const Slice& property, std::string* value) { } else if (in == "sstables") { *value = versions_->current()->DebugString(); return true; + } else if (in == "verify-db-integrity") { + std::map check_file_list; + versions_->AddLiveFilesWithSize(&check_file_list); + mutex_.Unlock(); + + std::set tablet_num; + std::map::iterator it = check_file_list.begin(); + for (; it != check_file_list.end(); ++it) { + uint64_t tablet; + ParseFullFileNumber(it->first, &tablet, NULL); + tablet_num.insert(tablet); + } + + Status s; + std::set::iterator it_tablet = tablet_num.begin(); + for (; s.ok() && it_tablet != tablet_num.end(); ++it_tablet) { + std::vector filenames; + std::string tablet_path = RealDbName(dbname_, *it_tablet); + s = env_->GetChildren(tablet_path, &filenames); + //Log(options_.info_log, "[%s] verify db(slow), GetChildren %s, files_nr %lu, status %s", + // dbname_.c_str(), tablet_path.c_str(), filenames.size(), s.ToString().c_str()); + + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type) && (type == kTableFile)) { + uint64_t tablet_no = BuildFullFileNumber(tablet_path, number); + if (check_file_list.find(tablet_no) == check_file_list.end()) { + continue; + } + + uint64_t fsize = 0; + Status s1 = env_->GetFileSize(tablet_path + "/" + filenames[i], &fsize); + if (!s1.ok() || check_file_list[tablet_no] == fsize) { + check_file_list.erase(tablet_no); + } else { + Log(options_.info_log, "[%s] verify db, size mismatch, " + "path %s, tablet %s, size(in meta) %lu, size(in fs) %lu", + dbname_.c_str(), tablet_path.c_str(), filenames[i].c_str(), check_file_list[tablet_no], fsize); + } + } + } + } + + mutex_.Lock(); + std::map live; + versions_->AddLiveFilesWithSize(&live); + + it = check_file_list.begin(); + while (it != check_file_list.end()) { + if (live.find(it->first) == live.end()) { + it = check_file_list.erase(it); + } else { + ++it; + } + } + + if (s.ok() && check_file_list.empty()) { // verify success + value->append("verify_success"); + } else if (s.ok()) { //sst file lost + value->append("verify_fail"); + Log(options_.info_log, "[%s] db_corruption, lost %lu", + dbname_.c_str(), check_file_list.size()); + } + return s.ok(); } return false; @@ -1894,29 +2185,29 @@ uint64_t DBImpl::GetLastSequence(bool is_locked) { retval = versions_->LastSequence(); } if (is_locked) { - mutex_.Unlock(); + mutex_.Unlock(); } return retval; } MemTable* DBImpl::NewMemTable() const { - if (!options_.use_memtable_on_leveldb) { - return new MemTable(internal_comparator_, - options_.enable_strategy_when_get ? options_.compact_strategy_factory : NULL); - } else { - Logger* info_log = NULL; - //Logger* info_log = options_.info_log; - MemTableOnLevelDB* new_mem = new MemTableOnLevelDB(dbname_, internal_comparator_, - options_.compact_strategy_factory, - options_.memtable_ldb_write_buffer_size, - options_.memtable_ldb_block_size, - info_log); - std::multiset::iterator i = snapshots_.begin(); - for (; i != snapshots_.end(); ++i) { - new_mem->GetSnapshot(*i); - } - return new_mem; - } + if (!options_.use_memtable_on_leveldb) { + return new MemTable(internal_comparator_, + options_.enable_strategy_when_get ? options_.compact_strategy_factory : NULL); + } else { + Logger* info_log = NULL; + //Logger* info_log = options_.info_log; + MemTableOnLevelDB* new_mem = new MemTableOnLevelDB(dbname_, internal_comparator_, + options_.compact_strategy_factory, + options_.memtable_ldb_write_buffer_size, + options_.memtable_ldb_block_size, + info_log); + std::multiset::iterator i = snapshots_.begin(); + for (; i != snapshots_.end(); ++i) { + new_mem->GetSnapshot(*i); + } + return new_mem; + } } uint64_t DBImpl::GetLastVerSequence() { @@ -1929,4 +2220,70 @@ Iterator* DBImpl::NewInternalIterator() { return NewInternalIterator(ReadOptions(), &ignored); } +Status DBImpl::BeginNewDbTransaction() { + std::string lock_file_name = dbname_ + init_load_filelock; + Status s = env_->FileExists(lock_file_name); + if (s.IsNotFound()) { + // first new by split or merge add __lock file for first create lg + s = WriteStringToFileSync(env_, "\n", lock_file_name); + if (!s.ok()) { + Log(options_.info_log, "[%s] fail to start new db transaction: %s", + dbname_.c_str(), s.ToString().c_str()); + return s; + } + } else if (s.ok()) { + // have failed before this time to open + // && ignore corruption option not opened + // && don't have sst files + // need to delete all files in this db except __init_load_filelock file + Log(options_.info_log, "[%s] begin to re-new db: %s", + dbname_.c_str(), s.ToString().c_str()); + std::vector files; + s = env_->GetChildren(dbname_, &files); + if (!s.ok()) { + Log(options_.info_log, "[%s] fail to re-new db: %s", + dbname_.c_str(), s.ToString().c_str()); + return s; + } + uint64_t number; + FileType type; + for (size_t f = 0; f < files.size(); ++f) { + if (ParseFileName(files[f], &number, &type) && kTableFile == type) { + return s; + } + } + for (size_t f = 0; f < files.size(); ++f) { + std::string old_file_name = dbname_ + "/" + files[f]; + if ("/" + files[f] != init_load_filelock) { + s = env_->DeleteFile(old_file_name); + if (!s.ok()) { + Log(options_.info_log, "[%s] fail to re-new db: %s", + dbname_.c_str(), s.ToString().c_str()); + return s; + } + } + } + } + return s; +} + +Status DBImpl::CommitNewDbTransaction() { + std::string lock_file_name = dbname_ + init_load_filelock; + Status s = env_->FileExists(lock_file_name); + if (s.IsNotFound()) { + // lost lock file during this new db + Log(options_.info_log, "[%s] find transaction lock file fail: %s", + dbname_.c_str(), s.ToString().c_str()); + return Status::Corruption("newdb transaction lock disappeared"); + } else if (s.ok()) { + s = env_->DeleteFile(lock_file_name); + if (!s.ok()) { + Log(options_.info_log, "[%s] delete transaction lock file fail: %s", + dbname_.c_str(), s.ToString().c_str()); + return Status::Corruption("newdb transaction clean lock faild"); + } + } + return s; +} + } // namespace leveldb diff --git a/src/leveldb/db/db_impl.h b/src/leveldb/db/db_impl.h index 05b1ae623..8f23fb1c2 100644 --- a/src/leveldb/db/db_impl.h +++ b/src/leveldb/db/db_impl.h @@ -14,6 +14,8 @@ #include "db/db_table.h" #include "db/dbformat.h" #include "db/log_writer.h" +#include "db/version_set.h" +#include "leveldb/compact_strategy.h" #include "leveldb/db.h" #include "leveldb/env.h" #include "port/port.h" @@ -52,6 +54,8 @@ class DBImpl : public DB { virtual void GetApproximateSizes(uint64_t* size, std::vector* lgsize = NULL); virtual void CompactRange(const Slice* begin, const Slice* end, int lg_no = -1); + virtual bool ShouldForceUnloadOnError(); + void AddBoundLogSize(uint64_t size); // tera-specific @@ -96,6 +100,12 @@ class DBImpl : public DB { friend class DBTable; struct CompactionState; struct Writer; + struct CompactionTask { + int64_t id; // compaction thread id + double score; // compaction score + uint64_t timeout; // compaction task delay time + DBImpl* db; + }; Iterator* NewInternalIterator(const ReadOptions&, SequenceNumber* latest_snapshot); @@ -105,15 +115,23 @@ class DBImpl : public DB { void MaybeIgnoreError(Status* s) const; + // parallel compaction + Status ParallelCompaction(Compaction* c); + + CompactStrategy* NewCompactStrategy(CompactionState* compact); + + void HandleCompactionWork(CompactionState* compact, + CompactStrategy* compact_strategy); + // Delete any unneeded files and stale in-memory entries. void DeleteObsoleteFiles(); // Compact the in-memory write buffer to disk. Switches to a new // log-file/memtable and writes a new descriptor iff successful. - Status CompactMemTable() + Status CompactMemTable(bool* sched_idle = NULL) EXCLUSIVE_LOCKS_REQUIRED(mutex_); - Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base) + Status WriteLevel0Table(MemTable* mem, VersionEdit* edit, Version* base, uint64_t* number = NULL) EXCLUSIVE_LOCKS_REQUIRED(mutex_); Status MakeRoomForWrite(bool force /* compact even if there is room? */) @@ -121,12 +139,10 @@ class DBImpl : public DB { void MaybeScheduleCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); static void BGWork(void* db); - void BackgroundCall(); - Status BackgroundCompaction() EXCLUSIVE_LOCKS_REQUIRED(mutex_); + void BackgroundCall(CompactionTask* task); + Status BackgroundCompaction(bool* sched_idle) EXCLUSIVE_LOCKS_REQUIRED(mutex_); void CleanupCompaction(CompactionState* compact) EXCLUSIVE_LOCKS_REQUIRED(mutex_); - Status DoCompactionWork(CompactionState* compact) - EXCLUSIVE_LOCKS_REQUIRED(mutex_); Status OpenCompactionOutputFile(CompactionState* compact); Status FinishCompactionOutputFile(CompactionState* compact, Iterator* input); @@ -154,6 +170,10 @@ class DBImpl : public DB { bool CheckMemTableCompaction(uint64_t last_sequence); MemTable* NewMemTable() const; + // new db transaction process + Status BeginNewDbTransaction(); + Status CommitNewDbTransaction(); + // Constant after construction Env* const env_; const InternalKeyComparator internal_comparator_; @@ -196,18 +216,24 @@ class DBImpl : public DB { std::set pending_outputs_; // Has a background compaction been scheduled or is running? - bool bg_compaction_scheduled_; - double bg_compaction_score_; - uint64_t bg_compaction_timeout_; - int64_t bg_schedule_id_; + std::vector bg_compaction_tasks_; + std::vector bg_compaction_score_; + std::vector bg_schedule_id_; // Information for a manual compaction + enum ManualCompactState { + kManualCompactIdle, // manual compact inited + kManualCompactConflict, // manual compact run simultaneously + kManualCompactWakeup, // restart delay compact task + }; struct ManualCompaction { int level; bool done; + bool being_sched; const InternalKey* begin; // NULL means beginning of key range const InternalKey* end; // NULL means end of key range InternalKey tmp_storage; // Used to keep track of compaction progress + ManualCompactState compaction_conflict; // 0 == idle, 1 == conflict, 2 == wake }; ManualCompaction* manual_compaction_; diff --git a/src/leveldb/db/db_table.cc b/src/leveldb/db/db_table.cc index d9a4a725c..89ea76a1e 100644 --- a/src/leveldb/db/db_table.cc +++ b/src/leveldb/db/db_table.cc @@ -98,6 +98,10 @@ Options InitOptionsLG(const Options& options, uint32_t lg_id) { opt.sst_size = lg_info->sst_size; opt.write_buffer_size = lg_info->write_buffer_size; opt.seek_latency = lg_info->seek_latency; + if (options.ignore_corruption_in_open_lg_list.find(lg_id) + != options.ignore_corruption_in_open_lg_list.end()) { + opt.ignore_corruption_in_open = true; + } return opt; } @@ -311,22 +315,6 @@ Status DBTable::Init() { uint32_t i = *it; DBImpl* impl = lg_list_[i]; s = impl->RecoverLastDumpToLevel0(lg_edits[i]); - - // LogAndApply to lg's manifest - if (s.ok()) { - MutexLock lock(&impl->mutex_); - s = impl->versions_->LogAndApply(lg_edits[i], &impl->mutex_); - if (s.ok()) { - impl->DeleteObsoleteFiles(); - impl->MaybeScheduleCompaction(); - } else { - Log(options_.info_log, "[%s] Fail to modify manifest of lg %d", - dbname_.c_str(), - i); - } - } else { - Log(options_.info_log, "[%s] Fail to dump log to level 0", dbname_.c_str()); - } delete lg_edits[i]; } @@ -497,6 +485,9 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { break; } mutex_.Lock(); + if (s.IsIOPermissionDenied()) { + fatal_error_ = s; + } } if (s.ok()) { std::vector lg_updates; @@ -525,7 +516,6 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { Log(options_.info_log, "[%s] [Fatal] Write to lg%u fail", dbname_.c_str(), i); s = lg_s; - fatal_error_ = lg_s; break; } } @@ -534,7 +524,10 @@ Status DBTable::Write(const WriteOptions& options, WriteBatch* my_batch) { for (uint32_t i = 0; i < lg_list_.size(); ++i) { lg_list_[i]->AddBoundLogSize(updates->DataSize()); } + } else { + fatal_error_ = s; } + // Commit updates if (s.ok() && lg_list_.size() > 1) { for (uint32_t i = 0; i < lg_list_.size(); ++i) { @@ -696,6 +689,19 @@ void DBTable::ReleaseSnapshot(uint64_t sequence_number) { } } +bool DBTable::ShouldForceUnloadOnError() { + MutexLock l(&mutex_); + bool permission_error = fatal_error_.IsIOPermissionDenied(); + if (permission_error) { //return early + return permission_error; + } + std::set::iterator it = options_.exist_lg_list->begin(); + for (; it != options_.exist_lg_list->end(); ++it) { + permission_error |= lg_list_[*it]->ShouldForceUnloadOnError(); + } + return permission_error; +} + const uint64_t DBTable::Rollback(uint64_t snapshot_seq, uint64_t rollback_point) { std::set::iterator it = options_.exist_lg_list->begin(); uint64_t rollback_seq = rollback_point == kMaxSequenceNumber ? last_sequence_ : rollback_point;; @@ -708,21 +714,28 @@ const uint64_t DBTable::Rollback(uint64_t snapshot_seq, uint64_t rollback_point) bool DBTable::GetProperty(const Slice& property, std::string* value) { bool ret = true; std::string ret_string; + std::set::iterator it = options_.exist_lg_list->begin(); for (; it != options_.exist_lg_list->end(); ++it) { std::string lg_value; bool lg_ret = lg_list_[*it]->GetProperty(property, &lg_value); if (lg_ret) { if (options_.exist_lg_list->size() > 1) { - ret_string.append(Uint64ToString(*it) + ": {\n"); + ret_string.append("LG:" + Uint64ToString(*it) + ":"); } ret_string.append(lg_value); if (options_.exist_lg_list->size() > 1) { - ret_string.append("\n}\n"); + ret_string.append(" "); } + } else { + ret = false; + break; } } - *value = ret_string; + + if (ret) { + *value = ret_string; + } return ret; } @@ -936,7 +949,6 @@ Status DBTable::RecoverLogFile(uint64_t log_number, uint64_t recover_limit, } } delete file; - return status; } @@ -1131,6 +1143,14 @@ int64_t DBTable::TEST_MaxNextLevelOverlappingBytes() { } int DBTable::SwitchLog(bool blocked_switch) { + { + MutexLock l(&mutex_); + if (fatal_error_.IsIOPermissionDenied()) { + Log(options_.info_log, "[%s] can not switch log becasue %s", + dbname_.c_str(), fatal_error_.ToString().c_str()); + return 2; + } + } if (!blocked_switch || log::AsyncWriter::BlockLogNum() < options_.max_block_log_number) { if (current_log_size_ == 0) { @@ -1156,6 +1176,10 @@ int DBTable::SwitchLog(bool blocked_switch) { Log(options_.info_log, "[%s] SwitchLog", dbname_.c_str()); } return 0; // success + } else if (s.IsIOPermissionDenied()) { + MutexLock l(&mutex_); + fatal_error_ = s; + return 2; // posix error EACCES = 13 } else { Log(options_.info_log, "[%s] fail to open logfile %s. SwitchLog failed", dbname_.c_str(), log_file_name.c_str()); diff --git a/src/leveldb/db/db_table.h b/src/leveldb/db/db_table.h index 4fa0a11c4..4ff14f46a 100644 --- a/src/leveldb/db/db_table.h +++ b/src/leveldb/db/db_table.h @@ -88,6 +88,8 @@ class DBTable : public DB { virtual const uint64_t Rollback(uint64_t snapshot_seq, uint64_t rollback_point = kMaxSequenceNumber); + virtual bool ShouldForceUnloadOnError(); + // DB implementations can export properties about their state // via this method. If "property" is a valid property understood by this // DB implementation, fills "*value" with its current value and returns diff --git a/src/leveldb/db/db_test.cc b/src/leveldb/db/db_test.cc index 7c25f2de6..a12a0536a 100644 --- a/src/leveldb/db/db_test.cc +++ b/src/leveldb/db/db_test.cc @@ -98,6 +98,23 @@ class SpecialEnv : public EnvWrapper { } Status NewWritableFile(const std::string& f, WritableFile** r) { + class InitLoadLockFile : public WritableFile { + private: + SpecialEnv* env_; + WritableFile* base_; + + public: + InitLoadLockFile(SpecialEnv* env, WritableFile* base) + : env_(env), + base_(base) { + } + ~InitLoadLockFile() { delete base_; } + Status Append(const Slice& data) { return base_->Append(data); } + Status Close() { return base_->Close(); } + Status Flush() { return base_->Flush(); } + Status Sync() { return base_->Sync(); } + }; + class SSTableFile : public WritableFile { private: SpecialEnv* env_; @@ -165,6 +182,8 @@ class SpecialEnv : public EnvWrapper { *r = new SSTableFile(this, *r); } else if (strstr(f.c_str(), "MANIFEST") != NULL) { *r = new ManifestFile(this, *r); + } else if (strstr(f.c_str(), "__init_load_filelock") != NULL) { + *r = new InitLoadLockFile(this, *r); } } return s; @@ -872,6 +891,40 @@ TEST(DBTest, Recover) { } while (ChangeOptions()); } +TEST(DBTest, RecoverWithLostCurrent) { + // before write anything delete current file + ASSERT_OK(env_->DeleteFile(CurrentFileName(dbname_ + "/0"))); + leveldb::WritableFile* lock_file; + ASSERT_OK(env_->NewWritableFile(dbname_ + "/0/__init_load_filelock", &lock_file)); + ASSERT_OK(lock_file->Append("\n")); + ASSERT_OK(lock_file->Sync()); + ASSERT_OK(lock_file->Close()); + delete lock_file; + do { + Reopen(); + ASSERT_OK(Put("foo", "v3")); + Reopen(); + ASSERT_EQ("v3", Get("foo")); + } while (ChangeOptions()); +} + +TEST(DBTest, RecoverWithLostManifest) { + // before write anything delete current file + ASSERT_OK(env_->DeleteFile(DescriptorFileName(dbname_ + "/0", 1))); + leveldb::WritableFile* lock_file; + ASSERT_OK(env_->NewWritableFile(dbname_ + "/0/__init_load_filelock", &lock_file)); + ASSERT_OK(lock_file->Append("\n")); + ASSERT_OK(lock_file->Sync()); + ASSERT_OK(lock_file->Close()); + delete lock_file; + do { + Reopen(); + ASSERT_OK(Put("foo", "v3")); + Reopen(); + ASSERT_EQ("v3", Get("foo")); + } while (ChangeOptions()); +} + TEST(DBTest, RecoveryWithEmptyLog) { do { ASSERT_OK(Put("foo", "v1")); diff --git a/src/leveldb/db/filename.cc b/src/leveldb/db/filename.cc index d56ea2ff7..4ac4a3864 100644 --- a/src/leveldb/db/filename.cc +++ b/src/leveldb/db/filename.cc @@ -129,7 +129,7 @@ bool ParseFileName(const std::string& fname, if (rest == "CURRENT") { *number = 0; *type = kCurrentFile; - } else if (rest == "LOCK") { + } else if (rest == "LOCK" || rest == "__init_load_filelock") { *number = 0; *type = kDBLockFile; } else if (rest == "LOG" || rest == "LOG.old") { @@ -242,6 +242,15 @@ std::string BuildTabletPath(const std::string& prefix, uint64_t tablet) { return dbname; } +std::string BuildTabletLgPath(const std::string& prefix, uint64_t tablet, uint64_t lg) { + char buf[100]; + snprintf(buf, sizeof(buf), "/tablet%08llu/%llu", + static_cast(tablet), + static_cast(lg)); + std::string lg_path = prefix + buf; + return lg_path; +} + std::string BuildTableFilePath(const std::string& prefix, uint64_t tablet, uint64_t lg, uint64_t number) { char buf[100]; @@ -252,6 +261,35 @@ std::string BuildTableFilePath(const std::string& prefix, uint64_t tablet, return MakeFileName(dbname, number & 0xffffffff, "sst"); } +std::string BuildTrashTableFilePath(const std::string& prefix, uint64_t tablet, + uint32_t lg_id, uint64_t number, + const std::string& time) { + char buf[100]; + snprintf(buf, sizeof(buf), "/tablet%08llu/%lu/%08llu.sst.%s", + static_cast(tablet), + static_cast(lg_id), + static_cast(number), + time.c_str()); + + return prefix + buf; +} + +std::string GetTimeStrFromTrashFile(const std::string& path) { + size_t dir_pos = path.rfind("/"); + if (dir_pos == std::string::npos || dir_pos == path.length() - 1) { + return ""; + } + std::string file = path.substr(dir_pos + 1, path.length() - dir_pos - 1); + + size_t time_pos = file.rfind("."); + if (time_pos == std::string::npos) { + return ""; + } + std::string time_str = file.substr(time_pos + 1, file.length() - time_pos - 1); + + return time_str; +} + std::string BuildTableFilePath(const std::string& prefix, uint64_t lg, uint64_t full_number) { uint64_t tablet, number; ParseFullFileNumber(full_number, &tablet, &number); diff --git a/src/leveldb/db/filename.h b/src/leveldb/db/filename.h index ede91c51a..b151c165b 100644 --- a/src/leveldb/db/filename.h +++ b/src/leveldb/db/filename.h @@ -97,12 +97,27 @@ extern uint64_t BuildFullFileNumber(const std::string& dbname, // from (/table1, 3) std::string BuildTabletPath(const std::string& prefix, uint64_t tablet); +std::string BuildTabletLgPath(const std::string& prfix, uint64_t tablet, uint64_t lg); + // Build file path from tablet_num & lg_num & file number // E.g. construct "/table1/tablet000003/0/00000001.sst" // from (/table1, 3, 0, 1) std::string BuildTableFilePath(const std::string& prefix, uint64_t tablet, uint64_t lg, uint64_t number); +// Build trash file path from tablet_num & lg & file number & time +// E.g. construct "/table1/tablet000003/0/00000001.sst.20170718-17-08-30" +// from (/table1, 3, 0, 1, 20170718-17-08-30) +std::string BuildTrashTableFilePath(const std::string& prefix, uint64_t tablet, + uint32_t lg_id, uint64_t number, + const std::string& time); + +// get time string from trash file path +// E.g. get "20170718-17-08-30" +// from "/table1/tablet000003/0/00000001.sst.20170718-17-08-30" +// if path is invalid, return "" +std::string GetTimeStrFromTrashFile(const std::string& path); + // Build file path from lg_num & full file number // E.g. construct "/table1/tablet000003/0/00000001.sst" // from (/table1, 0, 0x8000000300000001) diff --git a/src/leveldb/db/memtable.cc b/src/leveldb/db/memtable.cc index c9f284110..ddee41b1d 100644 --- a/src/leveldb/db/memtable.cc +++ b/src/leveldb/db/memtable.cc @@ -26,6 +26,7 @@ MemTable::MemTable(const InternalKeyComparator& cmp, CompactStrategyFactory* com : last_seq_(0), comparator_(cmp), refs_(0), + being_flushed_(false), table_(comparator_, &arena_), empty_(true), compact_strategy_factory_(compact_strategy_factory) { diff --git a/src/leveldb/db/memtable.h b/src/leveldb/db/memtable.h index ba608550e..a2a1a073a 100644 --- a/src/leveldb/db/memtable.h +++ b/src/leveldb/db/memtable.h @@ -79,6 +79,13 @@ class MemTable { empty_ = false; } + bool BeingFlushed() { return being_flushed_;} + void SetBeingFlushed(bool flag) { + assert(flag ? !being_flushed_ + : being_flushed_); + being_flushed_ = flag; + } + virtual ~MemTable(); protected: @@ -97,6 +104,7 @@ class MemTable { KeyComparator comparator_; int refs_; + bool being_flushed_; Arena arena_; Table table_; diff --git a/src/leveldb/db/version_edit.cc b/src/leveldb/db/version_edit.cc index fc95284e6..244733915 100644 --- a/src/leveldb/db/version_edit.cc +++ b/src/leveldb/db/version_edit.cc @@ -11,7 +11,6 @@ #include "db/filename.h" #include "db/version_set.h" #include "util/coding.h" - namespace leveldb { // Tag numbers for serialized VersionEdit. These numbers are written to @@ -29,6 +28,7 @@ enum Tag { kNewFile = 10, kDeletedFile = 11, kNewFileInfo = 12, + kSstFileDataSize = 13, // no more than 1<<20 kMaxTag = 1 << 20, @@ -147,6 +147,13 @@ void VersionEdit::EncodeTo(std::string* dst) const { PutVarint32(dst, str.size() + kMaxTag); PutVarint32(dst, kNewFileInfo); dst->append(str.data(), str.size()); + + // record sst FileData + str.clear(); + PutVarint64(&str, f.data_size); + PutVarint32(dst, str.size() + kMaxTag); + PutVarint32(dst, kSstFileDataSize); + dst->append(str.data(), str.size()); } } @@ -171,6 +178,43 @@ static bool GetLevel(Slice* input, int* level) { } } +Status VersionEdit::DecodeNewFileInfo(Slice* input, FileMetaData* f) { + bool decode_continue = true; + + while (decode_continue && input->size() > 0) { + uint32_t len = 0; + uint32_t tag = 0; + Slice file_input = *input; + GetVarint32(&file_input, &len); + if (len <= kMaxTag) { + break; + } + + GetVarint32(&file_input, &tag); + switch (tag) { + case kNewFileInfo: + GetVarint32(input, &len);// ignore len + GetVarint32(input, &tag);// ignore tag + GetVarint64(input, &f->del_percentage); + GetVarint64(input, &f->ttl_percentage); + GetVarint64(input, &f->check_ttl_ts); + break; + case kSstFileDataSize: + GetVarint32(input, &len); + GetVarint32(input, &tag); + GetVarint64(input, &f->data_size); + break; + default: + fprintf(stderr, "NewFile %lu without info, skip tag %d, len %d\n", + f->number & 0xffffffff, + tag, len); + decode_continue = false; + break; + } + } + return Status::OK(); +} + Status VersionEdit::DecodeFrom(const Slice& src) { Clear(); Slice input = src; @@ -285,29 +329,7 @@ Status VersionEdit::DecodeFrom(const Slice& src) { f.largest_fake = true; } - // new file format parser - Slice file_ptr = input; - uint32_t file_tag; - GetVarint32(&file_ptr, &file_tag); - if (file_tag > kMaxTag) { - // file_tag - kMaxTag; - GetVarint32(&file_ptr, &tag); - } - switch (tag) { - case kNewFileInfo: - GetVarint32(&input, &tag);// ignore len - GetVarint32(&input, &tag);// ignore tag - GetVarint64(&input, &f.del_percentage); - GetVarint64(&input, &f.ttl_percentage); - GetVarint64(&input, &f.check_ttl_ts); - break; - - default: - fprintf(stderr, "NewFile %lu without info, skip tag %d, len %d\n", - f.number & 0xffffffff, - tag, file_tag); - break; - } + DecodeNewFileInfo(&input, &f); new_files_.push_back(std::make_pair(level, f)); } else { msg = "new-file entry 1"; @@ -400,6 +422,8 @@ std::string VersionEdit::DebugString() const { AppendNumberTo(&r, file_number); r.append(" size "); AppendNumberTo(&r, f.file_size); + r.append(" data_size "); + AppendNumberTo(&r, f.data_size); r.append(" "); r.append(f.smallest.DebugString()); r.append(" .. "); diff --git a/src/leveldb/db/version_edit.h b/src/leveldb/db/version_edit.h index 0c64728d0..17b9bfc36 100644 --- a/src/leveldb/db/version_edit.h +++ b/src/leveldb/db/version_edit.h @@ -33,6 +33,7 @@ struct FileMetaData { InternalKey largest; // Largest internal key served by table bool smallest_fake; // smallest is not real, have out-of-range keys bool largest_fake; // largest is not real, have out-of-range keys + bool being_compacted; // Is this file undergoing compaction? FileMetaData() : refs(0), @@ -44,7 +45,8 @@ struct FileMetaData { file_size(0), data_size(0), smallest_fake(false), - largest_fake(false) { } + largest_fake(false), + being_compacted(false) { } }; class VersionEdit { @@ -157,6 +159,7 @@ class VersionEdit { FileMetaData f; f.number = file; f.file_size = file_size; + f.data_size = f.file_size; f.smallest = smallest; f.largest = largest; f.del_percentage = del_percentage; @@ -185,6 +188,7 @@ class VersionEdit { void EncodeTo(std::string* dst) const; Status DecodeFrom(const Slice& src); + Status DecodeNewFileInfo(Slice* input, FileMetaData* f); std::string DebugString() const; diff --git a/src/leveldb/db/version_edit_test.cc b/src/leveldb/db/version_edit_test.cc index c728af4cc..44a5d308f 100644 --- a/src/leveldb/db/version_edit_test.cc +++ b/src/leveldb/db/version_edit_test.cc @@ -26,6 +26,7 @@ enum Tag { kNewFile = 10, kDeletedFile = 11, kNewFileInfo = 12, + kSstFileDataSize = 13, // no more than 1<<20 kMaxTag = 1 << 20, @@ -53,7 +54,7 @@ class VersionEditTest: public VersionEdit { dst->append(str.data(), str.size()); } } - void EncodeToOld(std::string* dst) { + void EncodeToOld(std::string* dst, bool with_sst, bool with_data_size) { DumpToOldFormat(); if (has_comparator_) { PutVarint32(dst, kComparator); @@ -71,7 +72,43 @@ class VersionEditTest: public VersionEdit { PutVarint32(dst, kLastSequence); PutVarint64(dst, last_sequence_); } + if (!with_sst) { + return; + } + for (uint32_t i = 0; i < 5; i++) { + FileMetaData f; + f.number = 100 + i; + f.file_size = 200 + i; + f.data_size = f.file_size; + f.smallest = InternalKey("apple", 300 + i, kTypeValue); + f.largest = InternalKey("zookeeper", 400 + i, kTypeDeletion); + EncodeSstFile(i, f, dst, with_data_size); + } + } + + void EncodeSstFile(uint32_t level, const FileMetaData& f, std::string* dst, bool with_data_size) { + std::string str; + PutVarint32(&str,level); // level + PutVarint64(&str, f.number); + PutVarint64(&str, f.file_size); + PutLengthPrefixedSlice(&str, f.smallest.Encode()); + PutLengthPrefixedSlice(&str, f.largest.Encode()); + PutVarint32(&str, 0); // put f.smallest_fake + PutVarint32(&str, 0); // put f.largest_fake + + PutVarint32(dst, str.size() + kMaxTag); + PutVarint32(dst, kNewFile); + dst->append(str.data(), str.size()); + // record sst FileData + if (with_data_size) { + str.clear(); + PutVarint64(&str, f.data_size); + PutVarint32(dst, str.size() + kMaxTag); + PutVarint32(dst, kSstFileDataSize); + dst->append(str.data(), str.size()); + } } + void DumpToOldFormat() { has_comparator_ = HasComparator(); comparator_ = GetComparatorName(); @@ -126,22 +163,28 @@ static void CreateEditContent(VersionEditTest* edit) { edit->SetLastSequence(900); TestEncodeDecode(*edit); } -static void CreateEditContentV2(VersionEditTest* edit) { +static void CreateOldEncodedContent(VersionEditTest* edit, std::string* dst, + bool with_sst, bool with_data_size) { edit->SetComparatorName("test_nil_cmp"); edit->SetLogNumber(700); edit->SetNextFile(800); edit->SetLastSequence(900); TestEncodeDecode(*edit); + edit->EncodeToOld(dst, with_sst, with_data_size); } -static void CreateEditWithTtlInfo(VersionEditTest* edit) { +static void CreateEditWithSstDetail(VersionEditTest* edit) { for (int i = 0; i < 5; i++) { TestEncodeDecode(*edit); - edit->AddFile(i, 100 + i, 200 + i, - InternalKey("apple", 300 + i, kTypeValue), - InternalKey("zookeeper", 400 + i, kTypeDeletion), - 20 + i/* del percentage */, - 1000000000 + i/* timeout */, - 50 + i/* del percentage */); + FileMetaData f; + f.number = 100 + i; + f.file_size = 200 + i; + f.data_size = f.file_size; + f.smallest = InternalKey("apple", 300 + i, kTypeValue); + f.largest = InternalKey("zookeeper", 400 + i, kTypeDeletion); + f.del_percentage = 20 + i; + f.ttl_percentage = 50 + i; + f.check_ttl_ts = 1000000000 + i; + edit->AddFile(i, f); edit->DeleteFile(i, 500 + i); edit->SetCompactPointer(i, InternalKey("x00", 600 + i, kTypeValue)); } @@ -154,14 +197,13 @@ static void CreateEditWithTtlInfo(VersionEditTest* edit) { } TEST(VersionEditTest, EncodeFileInfoTag) { VersionEditTest edit; - CreateEditWithTtlInfo(&edit); + CreateEditWithSstDetail(&edit); fprintf(stderr, "%s\n", edit.DebugString().c_str()); } TEST(VersionEditTest, OldFormatRead) { VersionEditTest edit; - CreateEditContentV2(&edit); std::string c1, c3; - edit.EncodeToOld(&c1); // dump into old format + CreateOldEncodedContent(&edit, &c1, false, false); edit.EncodeTo(&c3); // dump into new format VersionEditTest parsed; @@ -174,6 +216,23 @@ TEST(VersionEditTest, OldFormatRead) { fprintf(stderr, "%s\n", parsed.DebugString().c_str()); } +TEST(VersionEditTest, DecodeFormatWithoutSstFileDataSize) { + VersionEditTest edit; + std::string c1, c3; + CreateOldEncodedContent(&edit, &c1, true, false); + edit.EncodeTo(&c3); // dump into new format + + VersionEditTest parsed; + Status s = parsed.DecodeFrom(c1); // use new Decode to parse old format + ASSERT_TRUE(s.ok()) << s.ToString(); + std::string c2; + parsed.EncodeTo(&c2); + + ASSERT_NE(c2, c3); + fprintf(stderr, "%s\n", parsed.DebugString().c_str()); + +} + TEST(VersionEditTest, EncodeUnknowTag) { VersionEditTest edit; CreateEditContent(&edit); diff --git a/src/leveldb/db/version_set.cc b/src/leveldb/db/version_set.cc index 088acd090..4c5d328aa 100644 --- a/src/leveldb/db/version_set.cc +++ b/src/leveldb/db/version_set.cc @@ -70,6 +70,15 @@ static int64_t TotalFileSize(const std::vector& files) { } return sum; } +static int64_t TotalFileSizeNotBeingCompacted(const std::vector& files) { + int64_t sum = 0; + for (size_t i = 0; i < files.size(); i++) { + if (!files[i]->being_compacted) { + sum += files[i]->file_size; + } + } + return sum; +} Version::~Version() { assert(refs_ == 0); @@ -129,11 +138,11 @@ static bool BeforeFile(const Comparator* ucmp, bool SomeFileOverlapsRange( const InternalKeyComparator& icmp, + const Comparator* ucmp, bool disjoint_sorted_files, const std::vector& files, const Slice* smallest_user_key, const Slice* largest_user_key) { - const Comparator* ucmp = icmp.user_comparator(); if (!disjoint_sorted_files) { // Need to check against all files for (size_t i = 0; i < files.size(); i++) { @@ -473,8 +482,17 @@ void Version::Unref() { bool Version::OverlapInLevel(int level, const Slice* smallest_user_key, const Slice* largest_user_key) { - return SomeFileOverlapsRange(vset_->icmp_, (level > 0), files_[level], - smallest_user_key, largest_user_key); + + // use row key comparator + CompactStrategy* strategy = vset_->options_->compact_strategy_factory->NewInstance(); + const Comparator* ucmp = strategy->RowKeyComparator(); + if (ucmp == NULL) { + ucmp = vset_->icmp_.user_comparator(); + } + bool overlap = SomeFileOverlapsRange(vset_->icmp_, ucmp, (level > 0), files_[level], + smallest_user_key, largest_user_key); + delete strategy; + return overlap; } int Version::PickLevelForMemTableOutput( @@ -516,12 +534,10 @@ void Version::GetOverlappingInputs( if (end != NULL) { user_end = end->user_key(); } - const Comparator* user_cmp = NULL; - CompactStrategy* strategy = NULL; - if (!vset_->options_->drop_base_level_del_in_compaction) { // use row key comparator - strategy = vset_->options_->compact_strategy_factory->NewInstance(); - user_cmp = strategy->RowKeyComparator(); - } + + // use row key comparator + CompactStrategy* strategy = vset_->options_->compact_strategy_factory->NewInstance(); + const Comparator* user_cmp = strategy->RowKeyComparator(); if (user_cmp == NULL) { user_cmp = vset_->icmp_.user_comparator(); } @@ -818,11 +834,7 @@ class VersionSetBuilder { FileMetaData* f = new FileMetaData(f_new); f->refs = 1; - - if (f->data_size == 0 && !f->smallest_fake && !f->largest_fake) { - // Make sure this is a new file generated by compaction. - f->data_size = f->file_size; - } + f->being_compacted = false; // We arrange to automatically compact this file after // a certain number of seeks. Let's assume: @@ -927,6 +939,7 @@ class VersionSetBuilder { vset_->db_key_start_.DebugString().c_str()); f->smallest = vset_->db_key_start_; f->smallest_fake = true; + f->data_size = 0; } else { // file out of tablet range, skip it; return false; @@ -943,6 +956,7 @@ class VersionSetBuilder { vset_->db_key_end_.DebugString().c_str()); f->largest = vset_->db_key_end_; f->largest_fake = true; + f->data_size = 0; } else { // file out of tablet range, skip it; return false; @@ -1014,7 +1028,18 @@ void VersionSet::AppendVersion(Version* v) { v->next_->prev_ = v; } -Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { +// multi thread safe +// Information kept for every waiting manifest writer +struct VersionSet::ManifestWriter { + Status status; + VersionEdit* edit; + bool done; + port::CondVar cv; + + explicit ManifestWriter(port::Mutex* mu) : done(false), cv(mu) { } +}; +void VersionSet::LogAndApplyHelper(VersionSetBuilder* builder, + VersionEdit* edit) { if (edit->has_log_number_) { assert(edit->log_number_ >= log_number_); assert(edit->log_number_ < next_file_number_); @@ -1036,13 +1061,28 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { edit->SetLastSequence(last_sequence_); } + builder->Apply(edit); +} + +Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { + mu->AssertHeld(); + // multi write control, do not batch edit write, but multi thread safety + ManifestWriter w(mu); + w.edit = edit; + manifest_writers_.push_back(&w); + while (!w.done && &w != manifest_writers_.front()) { + w.cv.Wait(); + } + assert(manifest_writers_.front() == &w); + + // first manifest writer, batch edit Version* v = new Version(this); { VersionSetBuilder builder(this, current_); - builder.Apply(edit); + LogAndApplyHelper(&builder, w.edit); builder.SaveTo(v); } - Finalize(v); + Finalize(v); // recalculate new version score const uint64_t switch_interval = options_->manifest_switch_interval * 1000000UL; if (descriptor_log_ != NULL && @@ -1050,6 +1090,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { force_switch_manifest_ = true; } + uint64_t manifest_file_num = manifest_file_number_; int retry_count = 0; Status s; // Unlock during expensive MANIFEST log write @@ -1063,13 +1104,14 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { } mu->Unlock(); + // close current manifest if (force_switch_manifest_) { delete descriptor_log_; delete descriptor_file_; descriptor_log_ = NULL; descriptor_file_ = NULL; - Log(options_->info_log, "[%s] force switch MANIFEST to %lu", - dbname_.c_str(), manifest_file_number_); + Log(options_->info_log, "[%s] force switch MANIFEST #%lu to #%lu", + dbname_.c_str(), manifest_file_num, manifest_file_number_); force_switch_manifest_ = false; } @@ -1113,15 +1155,65 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { if (s.ok() && !new_manifest_file.empty()) { s = SetCurrentFile(env_, dbname_, manifest_file_number_); if (s.ok()) { - Log(options_->info_log, "[%s] set CURRENT to %llu\n", - dbname_.c_str(), static_cast(manifest_file_number_)); + Log(options_->info_log, "[%s] set CURRENT #%lu to #%llu success\n", + dbname_.c_str(),manifest_file_num, + static_cast(manifest_file_number_)); + manifest_file_num = manifest_file_number_; } else { - Log(options_->info_log, "[%s][dfs error] set CURRENT error: %s\n", - dbname_.c_str(), s.ToString().c_str()); + Log(options_->info_log, "[%s][dfs error] set CURRENT #%lu to #%lu error: %s\n", + dbname_.c_str(), manifest_file_num, manifest_file_number_, + s.ToString().c_str()); } } - if (!s.ok()) { + // switch manifest success, try delete obsolete file + if (!new_manifest_file.empty() && s.ok()) { + // manifest file set, keep latest 3 manifest files for backup + std::set manifest_set; + std::vector filenames; + env_->GetChildren(dbname_, &filenames); + + uint64_t number; + FileType type; + for (size_t i = 0; i < filenames.size(); i++) { + if (ParseFileName(filenames[i], &number, &type)) { + bool keep = true; + switch (type) { + case kDescriptorFile: + manifest_set.insert(filenames[i]); + if (manifest_set.size() > 3) { + std::set::iterator it = manifest_set.begin(); + ParseFileName(*it, &number, &type); + if (number < manifest_file_number_) { + // Keep my manifest file, and any newer incarnations' + // (in case there is a race that allows other incarnations) + filenames[i] = *it; + keep = false; + manifest_set.erase(it); + } + } + break; + case kTempFile: + // Any temp files that are currently being written to must + // be recorded in pending_outputs_, which is inserted into "live" + keep = false; + break; + default: + break; + } + + if (!keep) { + Log(options_->info_log, "[%s] version_set Delete type=%s #%lld, fname %s\n", + dbname_.c_str(), FileTypeToString(type), + static_cast(number), filenames[i].c_str()); + env_->DeleteFile(dbname_ + "/" + filenames[i]); + } + } + } + } + // if MANIFEST or CURRENT file write error because of losting directory lock, + // do not try to switch manifest anymore + if (!s.ok() && !s.IsIOPermissionDenied()) { force_switch_manifest_ = true; if (!new_manifest_file.empty()) { env_->DeleteFile(new_manifest_file); @@ -1141,7 +1233,7 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { } mu->Lock(); - } while (force_switch_manifest_); + } while (force_switch_manifest_); // bugfix issue=tera-10, dfs sync fail, but eventually success, cause reload fail // Install the new version if (s.ok()) { @@ -1155,6 +1247,10 @@ Status VersionSet::LogAndApply(VersionEdit* edit, port::Mutex* mu) { Log(options_->info_log, "[%s][dfs error] set force_switch_manifest", dbname_.c_str()); } + manifest_writers_.pop_front(); + if (!manifest_writers_.empty()) { + manifest_writers_.front()->cv.Signal(); + } return s; } @@ -1462,9 +1558,11 @@ Status VersionSet::Recover() { FileMetaData* f = files[i]; ModifyFileSize(f); // Debug - Log(options_->info_log, "[%s] recover: %s, level: %d, del_p: %lu, check_ttl_ts %lu, ttl_p %lu, s: %d %s, l: %d %s\n", + Log(options_->info_log, "[%s] recover: %s, level: %d, file_size %lu, data_size %lu, " + "del_p: %lu, check_ttl_ts %lu, ttl_p %lu, s: %d %s, l: %d %s\n", dbname_.c_str(), FileNumberDebugString(f->number).c_str(), level, + f->file_size, f->data_size, f->del_percentage, f->check_ttl_ts, f->ttl_percentage, @@ -1480,6 +1578,9 @@ Status VersionSet::Recover() { // Modify data_size of file meta bool VersionSet::ModifyFileSize(FileMetaData* f) { + if (f->data_size != 0) { + return true; + } // Try modify data_size in file meta // data_size = largest_key_offset - smallest_key_offset if (f->largest_fake || f->smallest_fake) { @@ -1508,8 +1609,7 @@ bool VersionSet::ModifyFileSize(FileMetaData* f) { static_cast(f->file_size), static_cast(f->data_size)); delete iter; - } else { - // do not need modify + } else { // for compatibility, we have not decoded f->data_size from MANIFEST f->data_size = f->file_size; } return true; @@ -1523,8 +1623,6 @@ void VersionSet::MarkFileNumberUsed(uint64_t number) { void VersionSet::Finalize(Version* v) { // Precomputed best level for next compaction - int best_level = -1; - double best_score = -1; int best_del_level = -1; int best_del_idx = -1; int best_ttl_level = -1; @@ -1532,8 +1630,8 @@ void VersionSet::Finalize(Version* v) { int base_level = -1; for (int level = config::kNumLevels - 1; level >= 0; level--) { - double score; - if (level == 0) { + double score = 0; + if (level == 0 && level0_compactions_in_progress_.empty()) { // We treat level-0 specially by bounding the number of files // instead of number of bytes for two reasons: // @@ -1548,11 +1646,16 @@ void VersionSet::Finalize(Version* v) { // // (3) More level0 files means write hotspot. // We give lower score to avoid too much level0 compaction. - score = sqrt(v->files_[level].size() / - static_cast(config::kL0_CompactionTrigger)); - } else { + if (v->files_[level].size() <= (size_t)options_->slow_down_level0_score_limit) { + score = v->files_[level].size() / + static_cast(config::kL0_CompactionTrigger); + } else { + score = sqrt(v->files_[level].size() / + static_cast(config::kL0_CompactionTrigger)); + } + } else if (level > 0) { // Compute the ratio of current size to size limit. - const uint64_t level_bytes = TotalFileSize(v->files_[level]); + const uint64_t level_bytes = TotalFileSizeNotBeingCompacted(v->files_[level]); score = static_cast(level_bytes) / MaxBytesForLevel(level, options_->sst_size); } @@ -1562,16 +1665,15 @@ void VersionSet::Finalize(Version* v) { base_level = level; } - // size compaction does not allow trigger by base level - if ((score > best_score) && (level < config::kNumLevels - 1)) { - best_level = level; - best_score = score; + if (level < config::kNumLevels - 1) { + v->compaction_level_[level] = level; + v->compaction_score_[level] = (score < 1.0) ? 0: score; } for (size_t i = 0; i < v->files_[level].size(); i++) { FileMetaData* f = v->files_[level][i]; // del compaction does not allow trigger by base level - if ((level > 0) && (level < base_level) && + if ((!f->being_compacted) && (level > 0) && (level < base_level) && (f->del_percentage > options_->del_percentage) && (best_del_level < 0 || v->files_[best_del_level][best_del_idx]->del_percentage < f->del_percentage)) { @@ -1580,7 +1682,7 @@ void VersionSet::Finalize(Version* v) { } // ttl compaction can trigger in base level - if ((f->check_ttl_ts > 0) && + if ((!f->being_compacted) && (f->check_ttl_ts > 0) && (best_ttl_level < 0 || v->files_[best_ttl_level][best_ttl_idx]->check_ttl_ts > f->check_ttl_ts)) { best_ttl_level = level; @@ -1589,30 +1691,44 @@ void VersionSet::Finalize(Version* v) { } } - v->compaction_level_ = best_level; - v->compaction_score_ = best_score; + // sort all the levels based on their score. Higher scores get listed + // first. Use bubble sort because the number of entries are small. + for (int i = 0; i < config::kNumLevels - 2; i++) { + for (int j = i + 1; j < config::kNumLevels - 1; j++) { + if (v->compaction_score_[i] < v->compaction_score_[j]) { + int level = v->compaction_level_[i]; + double score = v->compaction_score_[i]; + v->compaction_level_[i] = v->compaction_level_[j]; + v->compaction_score_[i] = v->compaction_score_[j]; + v->compaction_level_[j] = level; + v->compaction_score_[j] = score; + } + } + } + if (best_del_level >= 0) { v->del_trigger_compact_ = v->files_[best_del_level][best_del_idx]; v->del_trigger_compact_level_ = best_del_level; Log(options_->info_log, - "[%s] del_strategy(current), level %d, num #%lu, file_size %lu, del_p %lu\n", - dbname_.c_str(), - v->del_trigger_compact_level_, - (v->del_trigger_compact_->number) & 0xffffffff, - v->del_trigger_compact_->file_size, - v->del_trigger_compact_->del_percentage); + "[%s] del_strategy(current), level %d, num #%lu, file_size %lu, del_p %lu\n", + dbname_.c_str(), + v->del_trigger_compact_level_, + (v->del_trigger_compact_->number) & 0xffffffff, + v->del_trigger_compact_->file_size, + v->del_trigger_compact_->del_percentage); } + if (best_ttl_level >= 0) { v->ttl_trigger_compact_ = v->files_[best_ttl_level][best_ttl_idx]; v->ttl_trigger_compact_level_ = best_ttl_level; Log(options_->info_log, - "[%s] ttl_strategy(current), level %d, num #%lu, file_size %lu, ttl_p %lu, check_ts %lu\n", - dbname_.c_str(), - v->ttl_trigger_compact_level_, - (v->ttl_trigger_compact_->number) & 0xffffffff, - v->ttl_trigger_compact_->file_size, - v->ttl_trigger_compact_->ttl_percentage, - v->ttl_trigger_compact_->check_ttl_ts); + "[%s] ttl_strategy(current), level %d, num #%lu, file_size %lu, ttl_p %lu, check_ts %lu\n", + dbname_.c_str(), + v->ttl_trigger_compact_level_, + (v->ttl_trigger_compact_->number) & 0xffffffff, + v->ttl_trigger_compact_->file_size, + v->ttl_trigger_compact_->ttl_percentage, + v->ttl_trigger_compact_->check_ttl_ts); } } @@ -1757,6 +1873,19 @@ void VersionSet::AddLiveFiles(std::map* live) { } } +void VersionSet::AddLiveFilesWithSize(std::map* live) { + for (Version* v = dummy_versions_.next_; + v != &dummy_versions_; + v = v->next_) { + for (int level = 0; level < config::kNumLevels; level++) { + const std::vector& files = v->files_[level]; + for (size_t i = 0; i < files.size(); i++) { + (*live)[files[i]->number] = files[i]->file_size; + } + } + } +} + int64_t VersionSet::NumLevelBytes(int level) const { assert(level >= 0); assert(level < config::kNumLevels); @@ -1854,97 +1983,472 @@ Iterator* VersionSet::MakeInputIterator(Compaction* c) { return result; } +void VersionSet::PrintFilesInCompaction(const std::vector& inputs) { + char buf[30]; + std::string fstr = "file: "; + for (size_t i = 0; i < inputs.size(); i++) { + FileMetaData* f = inputs[i]; + if (f->being_compacted) { + snprintf(buf, sizeof(buf), "%lu ", f->number); + fstr.append(buf); + break; + } + } + Log(options_->info_log, "[%s] test mark level [%s] bening compact.", dbname_.c_str(), + fstr.c_str()); + return; +} + +bool VersionSet::FilesInCompaction(const std::vector& inputs) { + for (size_t i = 0; i < inputs.size(); i++) { + FileMetaData* f = inputs[i]; + if (f->being_compacted) { + return true; + } + } + return false; +} + +void VersionSet::PrintRangeInCompaction(const InternalKey* smallest, const InternalKey* largest, int level) { + std::vector inputs; + assert(level < config::kNumLevels); + current_->GetOverlappingInputs(level, smallest, largest, &inputs); + PrintFilesInCompaction(inputs); + return; +} + +bool VersionSet::RangeInCompaction(const InternalKey* smallest, const InternalKey* largest, int level) { + std::vector inputs; + assert(level < config::kNumLevels); + current_->GetOverlappingInputs(level, smallest, largest, &inputs); + return FilesInCompaction(inputs); +} + +bool VersionSet::PickFutureCompaction(int level, std::vector* inputs) { + inputs->clear(); + std::vector candidate; + double low_level_score = 0; + double high_level_score = 0; + for (size_t li = 0; li < current_->compaction_score_.size(); li++) { + if (current_->compaction_level_[li] == level) { + low_level_score = current_->compaction_score_[li]; + } else if (current_->compaction_level_[li] == level + 1) { + high_level_score = current_->compaction_score_[li]; + } + } + if (low_level_score < 1.0 || + low_level_score <= high_level_score) { + return false; + } + + // file in level need compaction, pick file in next compaction + for (size_t i = 0; i < current_->files_[level].size(); i++) { + FileMetaData* f = current_->files_[level][i]; + if (f->being_compacted) { + continue; + } + + if (!compact_pointer_[level].empty() && + icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) <= 0) { + candidate.push_back(f); + continue; + } + + inputs->push_back(f); + break; + } + + if (inputs->empty()) { + FileMetaData* f = current_->files_[level][0]; + if (!f->being_compacted) { + inputs->push_back(f); + } + } + if (inputs->empty() && candidate.size() > 0) { + inputs->push_back(candidate[candidate.size() - 1]); + } + return !inputs->empty(); +} + +bool VersionSet::IsOverlapInFileRange(FileMetaData* lf, FileMetaData* f) { + if (lf == NULL || f == NULL) { + return false; + } + if (icmp_.Compare(lf->largest.Encode(), f->smallest.Encode()) < 0 || + icmp_.Compare(f->largest.Encode(), lf->smallest.Encode()) < 0) { + return false; + } + //Log(options_->info_log, "[%s] file range overlap, lfile #%d, [%s, %s] being_compact %d, " + // "file #%d, [%s, %s] being_compact %d\n", + // dbname_.c_str(), + // static_cast(lf->number & 0xffffffff), + // lf->smallest.Encode().ToString().c_str(), + // lf->largest.Encode().ToString().c_str(), + // lf->being_compacted, + // static_cast(f->number & 0xffffffff), + // f->smallest.Encode().ToString().c_str(), + // f->largest.Encode().ToString().c_str(), + // f->being_compacted); + return true; +} + +// Note: +// 1) if f in level1 being compacted, level0 may be blocked; +// 2) compacting pointer may cause other f in the same level to be blocked. +bool VersionSet::PickCompactionBySize(int level, std::vector* inputs) { + // Pick low level file, which will be compact next time + std::vector low_level_inputs; + PickFutureCompaction(level - 1, &low_level_inputs); + FileMetaData* low_level_file = NULL; + if (low_level_inputs.size() > 0) { + low_level_file = low_level_inputs[0]; + //Log(options_->info_log, "[%s] PickCompactionBySize, low_level %d, f[%s, %s] being_compact %d\n", + // dbname_.c_str(), level - 1, + // low_level_file->smallest.Encode().ToString().c_str(), + // low_level_file->largest.Encode().ToString().c_str(), + // low_level_file->being_compacted); + } + + inputs->clear(); + std::vector candidate; + // Pick the first file that comes after compact_pointer_[level] + for (size_t i = 0; i < current_->files_[level].size(); i++) { + FileMetaData* f = current_->files_[level][i]; + if (f->being_compacted) { + //Log(options_->info_log, "[%s] PickCompactionBySize, level %d, f[%s, %s] being_compact %d\n", + // dbname_.c_str(), level, + // f->smallest.Encode().ToString().c_str(), f->largest.Encode().ToString().c_str(), + // f->being_compacted); + continue; + } + if (!compact_pointer_[level].empty() && + icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) <= 0) { + //Log(options_->info_log, "[%s] PickCompactionBySize, skip by compact_pointer_[%d] %s, f[%s, %s] being_compacted %d\n", + // dbname_.c_str(), level, compact_pointer_[level].c_str(), + // f->smallest.Encode().ToString().c_str(), f->largest.Encode().ToString().c_str(), + // f->being_compacted); + if (!RangeInCompaction(&f->smallest, &f->largest, level + 1) && + !IsOverlapInFileRange(low_level_file, f)) { + candidate.push_back(f); + } + continue; + } + if (RangeInCompaction(&f->smallest, &f->largest, level + 1) || + IsOverlapInFileRange(low_level_file, f)) { + //PrintRangeInCompaction(&f->smallest, &f->largest, level + 1); + continue; + } + inputs->push_back(f); + break; + } + if (inputs->empty()) { + // Wrap-around to the beginning of the key space + FileMetaData* f = current_->files_[level][0]; + if (!f->being_compacted && !RangeInCompaction(&f->smallest, &f->largest, level + 1) && + !IsOverlapInFileRange(low_level_file, f)) { + inputs->push_back(f); + } + //Log(options_->info_log, "[%s] PickCompactBySize, wrap-arroud level %d, f[%s, %s] being_compacted %d\n", + // dbname_.c_str(), level, + // f->smallest.Encode().ToString().c_str(), f->largest.Encode().ToString().c_str(), + // f->being_compacted); + //PrintRangeInCompaction(&f->smallest, &f->largest, level + 1); + } + if (inputs->empty() && candidate.size() > 0) { + inputs->push_back(candidate[candidate.size() - 1]); + } + return !inputs->empty(); +} + // timeout for micro_second -double VersionSet::CompactionScore(uint64_t* timeout) const { - *timeout = 0; +void VersionSet::CompactionScore(std::vector >* scores) { uint64_t ts = env_->NowMicros(); Version* v = current_; - if (v->compaction_score_ >= 1) { - return v->compaction_score_; - } else if (v->del_trigger_compact_ != NULL && - v->del_trigger_compact_->del_percentage > options_->del_percentage) { - return (double)(v->del_trigger_compact_->del_percentage / 100.0); - } else if (v->ttl_trigger_compact_ != NULL && - ts >= v->ttl_trigger_compact_->check_ttl_ts) { - return (double)((v->ttl_trigger_compact_->ttl_percentage + 1) / 100.0); - } else if (v->file_to_compact_ != NULL) { - return 0.1f; + for (size_t i = 0; i < v->compaction_score_.size(); i++) { + if (v->compaction_score_[i] >= 1) { + scores->push_back(std::pair(v->compaction_score_[i], 0)); + } + } + if (v->del_trigger_compact_ != NULL && + !v->del_trigger_compact_->being_compacted && + v->del_trigger_compact_->del_percentage > options_->del_percentage) { + scores->push_back(std::pair( + (double)(v->del_trigger_compact_->del_percentage / 100.0), 0)); + } + if (v->ttl_trigger_compact_ != NULL && + !v->ttl_trigger_compact_->being_compacted && + ts >= v->ttl_trigger_compact_->check_ttl_ts) { + scores->push_back(std::pair( + (double)((v->ttl_trigger_compact_->ttl_percentage + 1) / 100.0), 0)); + } + if (v->file_to_compact_ != NULL && + !v->file_to_compact_->being_compacted) { + scores->push_back(std::pair(0.1, 0)); } // delay task if (v->ttl_trigger_compact_ != NULL && + !v->ttl_trigger_compact_->being_compacted && ts < v->ttl_trigger_compact_->check_ttl_ts) { - *timeout = (v->ttl_trigger_compact_->check_ttl_ts - ts + 1000000) / 1000; - return (double)((v->ttl_trigger_compact_->ttl_percentage + 1) / 100.0); + scores->push_back(std::pair( + (double)((v->ttl_trigger_compact_->ttl_percentage + 1) / 100.0), + ((v->ttl_trigger_compact_->check_ttl_ts - ts + 1000000) / 1000))); + } +} + +Compaction* VersionSet::NewSubCompact(Compaction* compact) { + Compaction* c = new Compaction(compact->level_); + c->output_level_ = compact->output_level_; + c->max_output_file_size_ = compact->max_output_file_size_; + c->input_version_ = compact->input_version_; + c->input_version_->Ref(); // make sure compacting version will not delete + + for (size_t i = 0; i < 2; i++) { + for (size_t j = 0; j < compact->inputs_[i].size(); j++) { + c->inputs_[i].push_back((compact->inputs_[i])[j]); + } + } + + for (size_t i = 0; i < compact->grandparents_.size(); i++) { + c->grandparents_.push_back(compact->grandparents_[i]); + } + c->grandparent_index_ = compact->grandparent_index_; + c->seen_key_ = compact->seen_key_; + c->overlapped_bytes_ = compact->overlapped_bytes_; + + c->drop_lower_bound_ = compact->drop_lower_bound_; + c->force_non_trivial_ = compact->force_non_trivial_; + return c; +} + +struct InternalKeyCompare { + InternalKeyCompare(const InternalKeyComparator* cmp) + : icmp(cmp) {} + + InternalKeyCompare(const InternalKeyCompare& key_cmp) + : icmp(key_cmp.icmp) {} + + // retuen true if a < b + bool operator () (const std::string& ikey_a, const std::string& ikey_b) { + InternalKey ikey1, ikey2; + ikey1.DecodeFrom(ikey_a); + ikey2.DecodeFrom(ikey_b); + bool res = icmp->InternalKeyComparator::Compare(ikey1.Encode(), ikey2.Encode()) < 0; + return res; + } + + const InternalKeyComparator* icmp; +}; + +uint64_t VersionSet::GetApproximateSizeByLevel(Version* v, int level, const InternalKey& ikey) { + uint64_t result = 0; + const std::vector& files = v->files_[level]; + for (size_t i = 0; i < files.size(); i++) { + if (icmp_.Compare(files[i]->largest, ikey) <= 0) { + // Entire file is before "ikey", so just add the file size + result += files[i]->file_size; + } else if (icmp_.Compare(files[i]->smallest, ikey) > 0) { + // Entire file is after "ikey", so ignore + if (level > 0) { + // Files other than level 0 are sorted by meta->smallest, so + // no further files in this level will contain data for + // "ikey". + break; + } + } else { + // "ikey" falls in the range for this table. Add the + // approximate offset of "ikey" within the table. + Table* tableptr; + Slice smallest = files[i]->smallest_fake ? files[i]->smallest.Encode() : ""; + Slice largest = files[i]->largest_fake ? files[i]->largest.Encode() : ""; + Iterator* iter = table_cache_->NewIterator( + ReadOptions(options_), dbname_, files[i]->number, files[i]->file_size, + smallest, largest, &tableptr); + if (tableptr != NULL) { + result += tableptr->ApproximateOffsetOf(ikey.Encode()); + } + delete iter; + } + } + return result; +} + +void VersionSet::GenerateSubCompaction(Compaction* compact, std::vector * compact_vec, + port::Mutex* mu) { + mu->AssertHeld(); + if (options_->max_sub_parallel_compaction <= 1) { + Compaction* c = NewSubCompact(compact); + compact_vec->push_back(c); + return; + } + + // generate candidate sub compaction split key + InternalKeyCompare icmp(&icmp_); + std::set boundary(icmp); + for (int i = compact->level_; i < compact->output_level_; i++ ) { + for (size_t j = 0; j < compact->inputs_[i - compact->level_].size(); j++) { + FileMetaData* f = compact->inputs_[i - compact->level_][j]; + boundary.insert(f->smallest.Encode().ToString()); + boundary.insert(f->largest.Encode().ToString()); + } + } + for (size_t j = 1; j < compact->inputs_[compact->output_level_ - compact->level_].size(); j++) { + FileMetaData* f = compact->inputs_[compact->output_level_ - compact->level_][j]; + boundary.insert(f->smallest.Encode().ToString()); + } + + mu->Unlock(); + // generate sub compaction range by output file size + uint64_t sum = 0, prev_sum = 0; + std::set::iterator it = boundary.begin(); + while (it != boundary.end()) { + sum = 0; + InternalKey ikey; + ikey.DecodeFrom(*it); + for (int i = compact->level_; i <= compact->output_level_; i++ ) { + sum += GetApproximateSizeByLevel(compact->input_version_, i, ikey); + } + + assert(sum >= prev_sum); + if (compact->max_output_file_size_ > sum - prev_sum) { + it = boundary.erase(it); + } else { + ++it; + prev_sum = sum; + } + } + mu->Lock(); + + // limit max sub compaction + assert(options_->max_sub_parallel_compaction > 1); + uint64_t avg_num = (boundary.size() + 1) / options_->max_sub_parallel_compaction + 1; + it = boundary.begin(); + uint64_t i = 1; + while (avg_num > 1 && it != boundary.end()) { + if (i % avg_num != 0) { + it = boundary.erase(it); + } else { + ++it; + } + i++; } - // nothing to do - return -1.0; + // construct compaction + if (boundary.size() == 0) { + Compaction* c = NewSubCompact(compact); + compact_vec->push_back(c); + } else { + std::set::iterator it = boundary.begin(); + std::string prev_key; + while (true) { + Compaction* c = NewSubCompact(compact); + c->sub_compact_start_ = prev_key; + c->sub_compact_end_ = *it; + compact_vec->push_back(c); + + ++it; + prev_key = c->sub_compact_end_; + if (it == boundary.end()) { + Compaction* c1 = NewSubCompact(compact); + c1->sub_compact_start_ = prev_key; + compact_vec->push_back(c1); + break; + } + } + } } Compaction* VersionSet::PickCompaction() { - Compaction* c; - int level; + int level = -1; + std::vector inputs; + bool set_non_trivial = false; // We prefer compactions triggered by too much data in a level over // the compactions triggered by seeks. - const bool size_compaction = (current_->compaction_score_ >= 1); + const bool size_compaction = (current_->compaction_score_[0] >= 1); const bool seek_compaction = (current_->file_to_compact_ != NULL); const bool del_compaction = (current_->del_trigger_compact_ != NULL); const bool ttl_compaction = (current_->ttl_trigger_compact_ != NULL); - if (size_compaction) { - level = current_->compaction_level_; - assert(level >= 0); - assert(level+1 < config::kNumLevels); - c = new Compaction(level); - // Pick the first file that comes after compact_pointer_[level] - for (size_t i = 0; i < current_->files_[level].size(); i++) { - FileMetaData* f = current_->files_[level][i]; - if (compact_pointer_[level].empty() || - icmp_.Compare(f->largest.Encode(), compact_pointer_[level]) > 0) { - c->inputs_[0].push_back(f); + // check size compaction + assert(level0_compactions_in_progress_.size() <= 1); + bool skipped_l0 = false; + for (size_t li = 0; size_compaction && li < current_->compaction_score_.size(); li++) { + double score = current_->compaction_score_[li]; + level = current_->compaction_level_[li]; + assert(li == 0 || score <= current_->compaction_score_[li - 1]); + if (score >= 1) { + assert(level >= 0); + assert(level+1 < config::kNumLevels); + if (skipped_l0 && level <= 1) { + // level0 in progress and level 0 will not directly compact to level > 1 + //Log(options_->info_log, "[%s] lock level %d, conflict, score %.2f\n", + // dbname_.c_str(), level, score); + continue; + } + if (level == 0 && !level0_compactions_in_progress_.empty()) { + skipped_l0 = true; + //Log(options_->info_log, "[%s] level %d in progress, conflict, score %.2f\n", + // dbname_.c_str(), level, score); + continue; + } + if (PickCompactionBySize(level, &inputs)) { break; } + //Log(options_->info_log, "[%s] pick level %d, conflict, score %.2f\n", + // dbname_.c_str(), level, score); } - if (c->inputs_[0].empty()) { - // Wrap-around to the beginning of the key space - c->inputs_[0].push_back(current_->files_[level][0]); - } - } else if (seek_compaction) { - // compaction trigger by seek percentage - // TODO: multithread should lock it + } + + // check seek compaction + if (inputs.empty() && seek_compaction) { level = current_->file_to_compact_level_; - c = new Compaction(level); - c->inputs_[0].push_back(current_->file_to_compact_); - } else if (del_compaction) { + assert(level >= 0); + assert(level+1 < config::kNumLevels); + FileMetaData* f = current_->file_to_compact_; + if (!f->being_compacted && + (level > 0 || level0_compactions_in_progress_.empty()) && + !RangeInCompaction(&f->smallest, &f->largest, level + 1)) { + inputs.push_back(f); + } + } + + // check del compaction + if (inputs.empty() && del_compaction) { // compaction trigger by delete tags percentage; // TODO: multithread should lock it level = current_->del_trigger_compact_level_; assert(level >= 0); assert(level+1 < config::kNumLevels); - c = new Compaction(level); - c->SetNonTrivial(true); - c->inputs_[0].push_back(current_->del_trigger_compact_); - Log(options_->info_log, + FileMetaData* f = current_->del_trigger_compact_; + if (!f->being_compacted && + (level > 0 || level0_compactions_in_progress_.empty()) && + !RangeInCompaction(&f->smallest, &f->largest, level + 1)) { + inputs.push_back(f); + set_non_trivial = true; + Log(options_->info_log, "[%s] compact trigger by del stragety, level %d, num #%lu, file_size %lu, del_p %lu\n", dbname_.c_str(), current_->del_trigger_compact_level_, (current_->del_trigger_compact_->number) & 0xffffffff, current_->del_trigger_compact_->file_size, current_->del_trigger_compact_->del_percentage); - } else if (ttl_compaction) { + } + } + + // check ttl compaction + if (inputs.empty() && ttl_compaction) { // compaction trigger by ttl tags percentage // TODO: multithread should lock it level = current_->ttl_trigger_compact_level_; assert(level >= 0); - c = new Compaction(level); - c->SetNonTrivial(true); - c->inputs_[0].push_back(current_->ttl_trigger_compact_); - if (level == config::kNumLevels - 1) {// level in last level - c->set_output_level(level); - } - Log(options_->info_log, + FileMetaData* f = current_->ttl_trigger_compact_; + if (!f->being_compacted && + (level > 0 || level0_compactions_in_progress_.empty()) && + (level+1 == config::kNumLevels || !RangeInCompaction(&f->smallest, &f->largest, level + 1))) { + inputs.push_back(f); + set_non_trivial = true; + Log(options_->info_log, "[%s] compact trigger by ttl stragety, level %d, num #%lu, file_size %lu, ttl_p %lu, check_ts %lu\n", dbname_.c_str(), current_->ttl_trigger_compact_level_, @@ -1952,32 +2456,57 @@ Compaction* VersionSet::PickCompaction() { current_->ttl_trigger_compact_->file_size, current_->ttl_trigger_compact_->ttl_percentage, current_->ttl_trigger_compact_->check_ttl_ts); - } else { + } + } + if (inputs.empty()) { return NULL; } - c->input_version_ = current_; - c->input_version_->Ref(); - c->max_output_file_size_ = - MaxFileSizeForLevel(c->output_level(), current_->vset_->options_->sst_size); - + assert(inputs.size() == 1); + assert(level >= 0); // Files in level 0 may overlap each other, so pick up all overlapping ones if (level == 0) { + assert(level0_compactions_in_progress_.size() == 0); InternalKey smallest, largest; - GetRange(c->inputs_[0], &smallest, &largest); + GetRange(inputs, &smallest, &largest); // Note that the next call will discard the file we placed in // c->inputs_[0] earlier and replace it with an overlapping set // which will include the picked file. - current_->GetOverlappingInputs(0, &smallest, &largest, &c->inputs_[0]); - assert(!c->inputs_[0].empty()); + current_->GetOverlappingInputs(level, &smallest, &largest, &inputs); + GetRange(inputs, &smallest, &largest); + if (RangeInCompaction(&smallest, &largest, level + 1)) { // make sure level1 not in compaction + Log(options_->info_log, "[%s] level1 in compacting, level0 conflict\n", + dbname_.c_str()); + return NULL; + } + assert(!inputs.empty()); + assert(!FilesInCompaction(inputs)); + } + + // expand inputs + Compaction* c = new Compaction(level); + c->SetNonTrivial(set_non_trivial); + c->input_version_ = current_; + c->input_version_->Ref(); // make sure compacting version will not delete + if (level == config::kNumLevels - 1) {// level in last level + c->set_output_level(level); } + c->max_output_file_size_ = + MaxFileSizeForLevel(c->output_level(), current_->vset_->options_->sst_size); + c->inputs_[0] = inputs; SetupOtherInputs(c); // tera-specific: calculate the smallest rowkey which overlap with file not // in this compaction. SetupCompactionBoundary(c); + + // mark being compacted + c->MarkBeingCompacted(true); + if (level == 0) { + level0_compactions_in_progress_.push_back(c); + } + Finalize(current_); // reculate level score return c; } - void VersionSet::SetupOtherInputs(Compaction* c) { if (c->level() == c->output_level()) { // self level compaction, should select next level return; @@ -2008,7 +2537,10 @@ void VersionSet::SetupOtherInputs(Compaction* c) { std::vector expanded1; current_->GetOverlappingInputs(c->output_level(), &new_start, &new_limit, &expanded1); - if (expanded1.size() == c->inputs_[1].size()) { + // check expanded file wether in compacting + if ((expanded1.size() == c->inputs_[1].size()) && + !RangeInCompaction(&new_start, &new_limit, level) && + !RangeInCompaction(&new_start, &new_limit, c->output_level())) { Log(options_->info_log, "[%s] Expanding@%d %d+%d (%ld+%ld bytes) to %d+%d (%ld+%ld bytes)\n", dbname_.c_str(), @@ -2084,11 +2616,18 @@ void VersionSet::SetupCompactionBoundary(Compaction* c) { Compaction* VersionSet::CompactRange( int level, const InternalKey* begin, - const InternalKey* end) { + const InternalKey* end, bool* being_compacted) { + *being_compacted = false; std::vector inputs; current_->GetOverlappingInputs(level, begin, end, &inputs); if (inputs.empty()) { - return NULL; + return NULL; + } + + // check level0 wether in compaction + if (level == 0 && !level0_compactions_in_progress_.empty()) { + *being_compacted = true; + return NULL; } // Avoid compacting too much in one shot in case the range is large. @@ -2109,6 +2648,18 @@ Compaction* VersionSet::CompactRange( } } + // check being compacting + InternalKey smallest, largest; + GetRange(inputs, &smallest, &largest); + if (FilesInCompaction(inputs) || RangeInCompaction(&smallest, &largest, level + 1)) { + PrintFilesInCompaction(inputs); + PrintRangeInCompaction(&smallest, &largest, level + 1); + Log(options_->info_log, "[%s] RangeCompaction : %s...%s, level: %d or %d, in compaction", + dbname_.c_str(), smallest.DebugString().c_str(), largest.DebugString().c_str(), level, level + 1); + *being_compacted = true; + return NULL; + } + Compaction* c = new Compaction(level); c->input_version_ = current_; c->input_version_->Ref(); @@ -2119,9 +2670,28 @@ Compaction* VersionSet::CompactRange( // tera-specific: calculate the smallest rowkey which overlap with file not // in this compaction. SetupCompactionBoundary(c); + + // mark being compacted + c->MarkBeingCompacted(true); + if (level == 0) { + level0_compactions_in_progress_.push_back(c); + } + Finalize(current_); // reculate level score return c; } +void VersionSet::ReleaseCompaction(Compaction* c, Status& s) { + c->MarkBeingCompacted(false); + assert(level0_compactions_in_progress_.size() <= 1); + if (c->level() == 0 && level0_compactions_in_progress_[0] == c) { + level0_compactions_in_progress_.resize(0); + } + if (!s.ok()) { + Finalize(current_); + } + return; +} + Compaction::Compaction(int level) : level_(level), output_level_(level + 1), @@ -2209,6 +2779,16 @@ bool Compaction::ShouldStopBefore(const Slice& internal_key) { } } +void Compaction::MarkBeingCompacted(bool flag) { + for (size_t i = 0; i < 2; i++) { + for (size_t j = 0; j < inputs_[i].size(); j++) { + assert(flag ? !inputs_[i][j]->being_compacted + : inputs_[i][j]->being_compacted); + inputs_[i][j]->being_compacted = flag; + } + } +} + void Compaction::ReleaseInputs() { if (input_version_ != NULL) { input_version_->Unref(); diff --git a/src/leveldb/db/version_set.h b/src/leveldb/db/version_set.h index 5a01d8dba..c933efced 100644 --- a/src/leveldb/db/version_set.h +++ b/src/leveldb/db/version_set.h @@ -19,6 +19,7 @@ #ifndef STORAGE_LEVELDB_DB_VERSION_SET_H_ #define STORAGE_LEVELDB_DB_VERSION_SET_H_ +#include #include #include #include @@ -56,6 +57,7 @@ extern int FindFile(const InternalKeyComparator& icmp, // in sorted order. extern bool SomeFileOverlapsRange( const InternalKeyComparator& icmp, + const Comparator* ucmp, bool disjoint_sorted_files, const std::vector& files, const Slice* smallest_user_key, @@ -147,8 +149,8 @@ class Version { // Level that should be compacted next and its compaction score. // Score < 1 means compaction is not strictly needed. These fields // are initialized by Finalize(). - double compaction_score_; - int compaction_level_; + std::vector compaction_score_; + std::vector compaction_level_; explicit Version(VersionSet* vset) : vset_(vset), next_(this), prev_(this), refs_(0), @@ -157,9 +159,13 @@ class Version { ttl_trigger_compact_(NULL), ttl_trigger_compact_level_(-1), del_trigger_compact_(NULL), - del_trigger_compact_level_(-1), - compaction_score_(-1), - compaction_level_(-1) { + del_trigger_compact_level_(-1) { + compaction_score_.resize(config::kNumLevels - 1); + compaction_level_.resize(config::kNumLevels - 1); + for (size_t i = 0; i < config::kNumLevels - 1; i++) { + compaction_score_[i] = -1.0; + compaction_level_[i] = -1; + } } ~Version(); @@ -182,6 +188,8 @@ class VersionSet { // current version. Will release *mu while actually writing to the file. // REQUIRES: *mu is held on entry. // REQUIRES: no other thread concurrently calls LogAndApply() + void LogAndApplyHelper(VersionSetBuilder* builder, + VersionEdit* edit); Status LogAndApply(VersionEdit* edit, port::Mutex* mu) EXCLUSIVE_LOCKS_REQUIRED(mu); @@ -231,7 +239,8 @@ class VersionSet { // being compacted, or zero if there is no such log file. uint64_t PrevLogNumber() const { return prev_log_number_; } - double CompactionScore(uint64_t* timeout) const; + // + void CompactionScore(std::vector >* scores); // Pick level and inputs for a new compaction. // Returns NULL if there is no compaction to be done. // Otherwise returns a pointer to a heap-allocated object that @@ -245,7 +254,10 @@ class VersionSet { Compaction* CompactRange( int level, const InternalKey* begin, - const InternalKey* end); + const InternalKey* end, bool* being_compacted); + + // release file's being_compacted flag, and release level0's lock + void ReleaseCompaction(Compaction* c, Status& s); // Return the maximum overlapping data (in bytes) at next level for any // file at a level >= 1. @@ -259,6 +271,7 @@ class VersionSet { // May also mutate some internal state. void AddLiveFiles(std::set* live); void AddLiveFiles(std::map* live); + void AddLiveFilesWithSize(std::map* live); // Return the approximate offset in the database of the data for // "key" as of version "v". @@ -271,10 +284,17 @@ class VersionSet { }; const char* LevelSummary(LevelSummaryStorage* scratch) const; + void GenerateSubCompaction(Compaction* compact, std::vector * compact_vec, + port::Mutex* mu); + private: friend class Compaction; friend class Version; friend class VersionSetBuilder; + struct ManifestWriter; + + Compaction* NewSubCompact(Compaction* compact); + uint64_t GetApproximateSizeByLevel(Version* v, int level, const InternalKey& ikey); void Finalize(Version* v); @@ -301,6 +321,15 @@ class VersionSet { bool ModifyFileSize(FileMetaData* f); + // milti thread compaction relatively + void PrintFilesInCompaction(const std::vector& inputs); + bool FilesInCompaction(const std::vector& inputs); + void PrintRangeInCompaction(const InternalKey* smallest, const InternalKey* largest, int level); + bool RangeInCompaction(const InternalKey* smallest, const InternalKey* largest, int level); + bool IsOverlapInFileRange(FileMetaData* lf, FileMetaData* f); + bool PickFutureCompaction(int level, std::vector* inputs); + bool PickCompactionBySize(int level, std::vector* inputs); + Env* const env_; const std::string dbname_; const Options* const options_; @@ -316,6 +345,8 @@ class VersionSet { uint64_t log_number_; uint64_t prev_log_number_; // 0 or backing store for memtable being compacted + std::deque manifest_writers_; + // Opened lazily WritableFile* descriptor_file_; log::Writer* descriptor_log_; @@ -325,6 +356,7 @@ class VersionSet { // Per-level key at which the next compaction at that level should start. // Either an empty string, or a valid InternalKey. std::string compact_pointer_[config::kNumLevels]; + std::vector level0_compactions_in_progress_; // No copying allowed VersionSet(const VersionSet&); @@ -372,6 +404,8 @@ class Compaction { // before processing "internal_key". bool ShouldStopBefore(const Slice& internal_key); + void MarkBeingCompacted(bool flag); + // Release the input version for the compaction, once the compaction // is successful. void ReleaseInputs(); @@ -384,6 +418,7 @@ class Compaction { private: friend class Version; friend class VersionSet; + friend class DBImpl; explicit Compaction(int level); @@ -420,6 +455,10 @@ class Compaction { // support self compaction bool force_non_trivial_; + + // support parallel compaction + std::string sub_compact_start_; // own by child + std::string sub_compact_end_; // own by child }; } // namespace leveldb diff --git a/src/leveldb/db/version_set_test.cc b/src/leveldb/db/version_set_test.cc index f4ad56367..4292ab0e7 100644 --- a/src/leveldb/db/version_set_test.cc +++ b/src/leveldb/db/version_set_test.cc @@ -6,10 +6,15 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. See the AUTHORS file for names of contributors. +#define private public #include "db/version_set.h" +#undef private + +#include "db/dbformat.h" #include "util/logging.h" #include "util/testharness.h" #include "util/testutil.h" +#include "leveldb/compact_strategy.h" namespace leveldb { @@ -46,7 +51,7 @@ class FindFileTest { InternalKeyComparator cmp(BytewiseComparator()); Slice s(smallest != NULL ? smallest : ""); Slice l(largest != NULL ? largest : ""); - return SomeFileOverlapsRange(cmp, disjoint_sorted_files_, files_, + return SomeFileOverlapsRange(cmp, cmp.user_comparator(), disjoint_sorted_files_, files_, (smallest != NULL ? &s : NULL), (largest != NULL ? &l : NULL)); } @@ -90,7 +95,6 @@ TEST(FindFileTest, Single) { ASSERT_TRUE(Overlaps(NULL, NULL)); } - TEST(FindFileTest, Multiple) { Add("150", "200"); Add("200", "250"); @@ -176,6 +180,57 @@ TEST(FindFileTest, OverlappingFiles) { ASSERT_TRUE(Overlaps("600", "700")); } +class VersionSetTest { +public: + VersionSetTest () + : icmp(opt.comparator), + t_log_number(10), + t_next_file(20), + t_last_seq(100) { + opt.compact_strategy_factory = new DummyCompactStrategyFactory(); + opt.env->DeleteDirRecursive("/tmp/db/test"); + opt.env->CreateDir("/tmp/db/test"); + t_vset = new VersionSet(std::string("/tmp/db/test"), &opt, NULL, &icmp); + t_vset->manifest_file_number_ = 100; + } + +public: + Options opt; + const InternalKeyComparator icmp; + VersionSet* t_vset; + uint64_t t_log_number; + uint64_t t_next_file; + uint64_t t_last_seq; + port::Mutex t_mu; +}; + +TEST(VersionSetTest, PickCompactionTest) { + VersionEdit edit; + + edit.AddFile(0, t_vset->NewFileNumber(), 200, + InternalKey("a0001", 1, kTypeValue), + InternalKey("a0002", 1, kTypeDeletion)); + edit.AddFile(0, t_vset->NewFileNumber(), 200, + InternalKey("a0003", 1, kTypeValue), + InternalKey("a0004", 1, kTypeValue)); + edit.SetComparatorName(leveldb::BytewiseComparator()->Name()); + t_mu.Lock(); + t_vset->LogAndApply(&edit, &t_mu); + t_mu.Unlock(); + Compaction* c = t_vset->PickCompaction(); + ASSERT_TRUE((uint64_t)t_vset->level0_compactions_in_progress_[0] == (uint64_t)c); + + VersionEdit edit1; + edit1.AddFile(0, t_vset->NewFileNumber(), 200, + InternalKey("a0005", 1, kTypeValue), + InternalKey("a0006", 1, kTypeValue)); + edit1.SetComparatorName(leveldb::BytewiseComparator()->Name()); + t_mu.Lock(); + t_vset->LogAndApply(&edit1, &t_mu); + t_mu.Unlock(); + ASSERT_TRUE(t_vset->PickCompaction() == NULL); +} + } // namespace leveldb int main(int argc, char** argv) { diff --git a/src/leveldb/include/leveldb/db.h b/src/leveldb/include/leveldb/db.h index 1d235801a..1b93fe8df 100644 --- a/src/leveldb/include/leveldb/db.h +++ b/src/leveldb/include/leveldb/db.h @@ -174,6 +174,8 @@ class DB { // Add all sst files inherited from other tablets virtual void AddInheritedLiveFiles(std::vector >* live) = 0; + virtual bool ShouldForceUnloadOnError() { return false; } + private: // No copying allowed DB(const DB&); diff --git a/src/leveldb/include/leveldb/dfs.h b/src/leveldb/include/leveldb/dfs.h index b5874848d..b5df4b0b2 100644 --- a/src/leveldb/include/leveldb/dfs.h +++ b/src/leveldb/include/leveldb/dfs.h @@ -7,6 +7,7 @@ #include #include +#include #include namespace leveldb { @@ -70,8 +71,12 @@ class Dfs { static Dfs* NewDfs(const std::string& so_path, const std::string& conf); /// Returns 0 on success. virtual int32_t UnlockDirectory(const std::string& path) = 0; + + virtual int32_t ClearDirOwner(const std::string& path) = 0; /// Returns DfsFile handler on success, NULL on error.WithTime virtual DfsFile* OpenFile(const std::string& filename, int32_t flags) = 0; + + virtual int32_t Stat(const std::string& filename, struct stat* fstat) = 0; private: Dfs(const Dfs&); void operator=(const Dfs&); diff --git a/src/leveldb/include/leveldb/env_dfs.h b/src/leveldb/include/leveldb/env_dfs.h index d34a2c697..bc0e65d9a 100644 --- a/src/leveldb/include/leveldb/env_dfs.h +++ b/src/leveldb/include/leveldb/env_dfs.h @@ -17,7 +17,7 @@ #include "leveldb/dfs.h" #include "leveldb/env.h" #include "leveldb/status.h" -#include "../../../utils/counter.h" +#include "../../../common/counter.h" namespace leveldb { @@ -60,6 +60,8 @@ class DfsEnv : public EnvWrapper { virtual Status UnlockFile(FileLock* lock); + int32_t ClearDirOwner(const std::string& dir) {return dfs_->ClearDirOwner(dir);} + virtual Env* CacheEnv() { return this; } static uint64_t gettid() { diff --git a/src/leveldb/include/leveldb/options.h b/src/leveldb/include/leveldb/options.h index be78d0d30..6793f0299 100644 --- a/src/leveldb/include/leveldb/options.h +++ b/src/leveldb/include/leveldb/options.h @@ -223,6 +223,8 @@ struct Options { std::set* exist_lg_list; std::map* lg_info_list; + std::set ignore_corruption_in_open_lg_list; + // compaction strategy to determine how to // drop the obsoleted kv records bool enable_strategy_when_get; @@ -310,13 +312,24 @@ struct Options { bool ignore_corruption_in_open; // Statistic: By default, if 10% entry timeout, will trigger compaction - // Default: 10 % + // Default: 99 % uint64_t ttl_percentage; // Statistic: delete tag's percentage in sst - // Default: 10 % + // Default: 20 % uint64_t del_percentage; + // Max thread alloc for lg's compaction + // Default: 5 + uint32_t max_background_compactions; + + // if level0's file num >= limit, use sqrt slow down level score + // Default: 30 + int slow_down_level0_score_limit; + + // parallel compaction + int max_sub_parallel_compaction; + // Create an Options object with default values for all fields. Options(); }; diff --git a/src/leveldb/include/leveldb/status.h b/src/leveldb/include/leveldb/status.h index 4bd364cca..0e062e6c1 100644 --- a/src/leveldb/include/leveldb/status.h +++ b/src/leveldb/include/leveldb/status.h @@ -55,6 +55,10 @@ class Status { return Status(kTimeOut, msg, msg2); } + static Status IOPermissionDenied(const Slice& msg, const Slice msg2 = Slice()) { + return Status(kIOPermissionDenied, msg, msg2); + } + // Returns true iff the status indicates success. bool ok() const { return (state_ == NULL); } @@ -69,6 +73,8 @@ class Status { // Returns true iff the status indicates an TimeOut. bool IsTimeOut() const { return code() == kTimeOut; } + + bool IsIOPermissionDenied() const { return code() == kIOPermissionDenied; } // Return a string representation of this status suitable for printing. // Returns the string "OK" for success. std::string ToString() const; @@ -88,7 +94,8 @@ class Status { kNotSupported = 3, kInvalidArgument = 4, kIOError = 5, - kTimeOut = 6 + kTimeOut = 6, + kIOPermissionDenied = 13 }; Code code() const { diff --git a/src/leveldb/port/port_posix.h b/src/leveldb/port/port_posix.h index ed19e222f..65f4274a1 100644 --- a/src/leveldb/port/port_posix.h +++ b/src/leveldb/port/port_posix.h @@ -46,9 +46,7 @@ #endif #include -#ifdef SNAPPY #include -#endif #include #include #include "port/atomic_pointer.h" @@ -124,33 +122,21 @@ extern void InitOnce(OnceType* once, void (*initializer)()); inline bool Snappy_Compress(const char* input, size_t length, ::std::string* output) { -#ifdef SNAPPY output->resize(snappy::MaxCompressedLength(length)); size_t outlen; snappy::RawCompress(input, length, &(*output)[0], &outlen); output->resize(outlen); return true; -#endif - - return false; } inline bool Snappy_GetUncompressedLength(const char* input, size_t length, size_t* result) { -#ifdef SNAPPY return snappy::GetUncompressedLength(input, length, result); -#else - return false; -#endif } inline bool Snappy_Uncompress(const char* input, size_t length, char* output) { -#ifdef SNAPPY return snappy::RawUncompress(input, length, output); -#else - return false; -#endif } /////////// Compression Ext /////////// diff --git a/src/leveldb/table/table_builder.cc b/src/leveldb/table/table_builder.cc index 9d6a7983b..63b70bb63 100644 --- a/src/leveldb/table/table_builder.cc +++ b/src/leveldb/table/table_builder.cc @@ -18,7 +18,7 @@ #include "table/format.h" #include "util/coding.h" #include "util/crc32c.h" -#include "../utils/counter.h" +#include "../common/counter.h" namespace leveldb { diff --git a/src/leveldb/util/env_cache.cc b/src/leveldb/util/env_cache.cc index 9d99fd168..51db78a27 100644 --- a/src/leveldb/util/env_cache.cc +++ b/src/leveldb/util/env_cache.cc @@ -33,6 +33,9 @@ const char* paths[] = {"./cache_dir_1/", "./cache_dir_2/"}; std::vector ThreeLevelCacheEnv::cache_paths_(paths, paths + 2); static Status IOError(const std::string& context, int err_number) { + if (err_number == EACCES) { + return Status::IOPermissionDenied(context, strerror(err_number)); + } return Status::IOError(context, strerror(err_number)); } diff --git a/src/leveldb/util/env_dfs.cc b/src/leveldb/util/env_dfs.cc index 53fde1804..f9f260b13 100644 --- a/src/leveldb/util/env_dfs.cc +++ b/src/leveldb/util/env_dfs.cc @@ -22,7 +22,7 @@ #include "leveldb/table_utils.h" #include "nfs.h" #include "util/mutexlock.h" -#include "../utils/counter.h" +#include "../common/counter.h" namespace leveldb { @@ -95,6 +95,9 @@ char* get_time_str(char* p, size_t len) // Log error message static Status IOError(const std::string& context, int err_number) { + if (err_number == EACCES) { + return Status::IOPermissionDenied(context, strerror(err_number)); + } return Status::IOError(context, strerror(err_number)); } diff --git a/src/leveldb/util/env_flash.cc b/src/leveldb/util/env_flash.cc index fd0702388..c6c42a9cc 100644 --- a/src/leveldb/util/env_flash.cc +++ b/src/leveldb/util/env_flash.cc @@ -21,7 +21,7 @@ #include "util/hash.h" #include "util/mutexlock.h" #include "helpers/memenv/memenv.h" -#include "../utils/counter.h" +#include "../common/counter.h" #include "leveldb/env_flash.h" @@ -38,6 +38,9 @@ const int64_t kUpdateFlashRetryIntervalMillis = 60 * 1000; // Log error message static Status IOError(const std::string& context, int err_number) { + if (err_number == EACCES) { + return Status::IOPermissionDenied(context, strerror(err_number)); + } return Status::IOError(context, strerror(err_number)); } @@ -68,7 +71,7 @@ Status CopyToLocal(const std::string& local_fname, Env* env, if (!s.ok()) { Log("[env_flash] create dir: %s failed: %s, exit", local_fname.substr(0, dir_pos).c_str(), s.ToString().c_str()); - exit(-1); + _exit(-1); } } @@ -79,7 +82,7 @@ Status CopyToLocal(const std::string& local_fname, Env* env, if (!vanish_allowed) { Log("[env_flash] create file: %s failed: %s, exit", local_fname.c_str(), s.ToString().c_str()); - exit(-1); + _exit(-1); } delete dfs_file; return s; @@ -501,7 +504,7 @@ void FlashEnv::SetFlashPath(const std::string& path, bool vanish_allowed) { && !Env::Default()->CreateDir(flash_paths_.back()).ok()) { Log("[env_flash] cannot access cache dir: %s\n", flash_paths_.back().c_str()); - exit(-1); + _exit(-1); } } } diff --git a/src/leveldb/util/env_inmem.cc b/src/leveldb/util/env_inmem.cc index 4e9855269..a587eacac 100644 --- a/src/leveldb/util/env_inmem.cc +++ b/src/leveldb/util/env_inmem.cc @@ -20,7 +20,7 @@ #include "leveldb/table_utils.h" #include "util/mutexlock.h" #include "helpers/memenv/memenv.h" -#include "../utils/counter.h" +#include "../common/counter.h" #include "leveldb/env_inmem.h" diff --git a/src/leveldb/util/env_mock.cc b/src/leveldb/util/env_mock.cc index 5265e58ea..abf13089e 100644 --- a/src/leveldb/util/env_mock.cc +++ b/src/leveldb/util/env_mock.cc @@ -51,6 +51,9 @@ void MockEnv::SetPrefix(const std::string& p) // Log error message static Status IOError(const std::string& context, int err_number) { + if (err_number == EACCES) { + return Status::IOPermissionDenied(context, strerror(err_number)); + } return Status::IOError(context, strerror(err_number)); } diff --git a/src/leveldb/util/env_posix.cc b/src/leveldb/util/env_posix.cc index fdc1d2ce4..6d495768e 100644 --- a/src/leveldb/util/env_posix.cc +++ b/src/leveldb/util/env_posix.cc @@ -36,7 +36,7 @@ #include "util/posix_logger.h" #include "util/string_ext.h" #include "util/thread_pool.h" -#include "../utils/counter.h" +#include "../common/counter.h" namespace leveldb { @@ -59,6 +59,9 @@ tera::Counter posix_other_counter; namespace { static Status IOError(const std::string& context, int err_number) { + if (err_number == EACCES) { + return Status::IOPermissionDenied(context, strerror(err_number)); + } return Status::IOError(context, strerror(err_number)); } @@ -132,9 +135,13 @@ class PosixRandomAccessFile: public RandomAccessFile { // problems for very large databases. class MmapLimiter { public: - // Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes. MmapLimiter() { - SetAllowed(sizeof(void*) >= 8 ? 1000 : 0); + //Disable mmap in tera for reducing memory use. + SetAllowed(0); + + // Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes. + //SetAllowed(sizeof(void*) >= 8 ? 1000 : 0); + //If you want to enable mmap, uncomment the line above. } // If another mmap slot is available, acquire it and return true. diff --git a/src/leveldb/util/hdfs.cc b/src/leveldb/util/hdfs.cc index b90fea36e..4a9721bc2 100644 --- a/src/leveldb/util/hdfs.cc +++ b/src/leveldb/util/hdfs.cc @@ -6,10 +6,10 @@ #include #include - #include "hdfs.h" #include "include/hdfs.h" -#include "../utils/counter.h" +#include "hdfs_util.h" +#include "../common/counter.h" namespace leveldb { @@ -233,6 +233,21 @@ int32_t Hdfs::UnlockDirectory(const std::string& path) { return -1; } + +int32_t Hdfs::Stat(const std::string& filename, struct stat* fstat) { + hdfsFileInfo* pFileInfo = (*hdfsGetPathInfo)((hdfsFS)fs_, filename.c_str()); + if (pFileInfo != NULL) { + HdfsFileInfo2PosixFileStat(pFileInfo, fstat); + (*hdfsFreeFileInfo)(pFileInfo, 1); + return 0; + } + return -1; +} + +int32_t Hdfs::ClearDirOwner(const std::string& path) { + // hdfs has no dir owner, so we return succ directly + return 0; } +} /* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */ diff --git a/src/leveldb/util/hdfs.h b/src/leveldb/util/hdfs.h index 81ed269ac..ebf464f6b 100644 --- a/src/leveldb/util/hdfs.h +++ b/src/leveldb/util/hdfs.h @@ -48,8 +48,9 @@ class Hdfs : public Dfs { int32_t ListDirectory(const std::string& path, std::vector* result); int32_t LockDirectory(const std::string& path); int32_t UnlockDirectory(const std::string& path); + int32_t ClearDirOwner(const std::string& path); DfsFile* OpenFile(const std::string& filename, int32_t flags); - + int32_t Stat(const std::string& filename, struct stat* fstat); private: void* fs_; @@ -92,8 +93,10 @@ class Hdfs2 : public Dfs { int32_t ListDirectory(const std::string& path, std::vector* result); int32_t LockDirectory(const std::string& path); int32_t UnlockDirectory(const std::string& path); + int32_t ClearDirOwner(const std::string& path); DfsFile* OpenFile(const std::string& filename, int32_t flags); + int32_t Stat(const std::string& filename, struct stat* fstat); private: void* GetFSHandle(const std::string& path); std::vector fs_list_; diff --git a/src/leveldb/util/hdfs2.cc b/src/leveldb/util/hdfs2.cc index fa3a8902c..0eac0ecea 100644 --- a/src/leveldb/util/hdfs2.cc +++ b/src/leveldb/util/hdfs2.cc @@ -7,8 +7,9 @@ #include "hdfs.h" #include "include/hdfs2.h" +#include "hdfs_util.h" #include "util/hash.h" -#include "../utils/counter.h" +#include "../common/counter.h" namespace leveldb { @@ -257,6 +258,21 @@ int32_t Hdfs2::UnlockDirectory(const std::string& path) { return -1; } +int32_t Hdfs2::ClearDirOwner(const std::string& path) { + // hdfs has no dir owner, so return succ directly + return 0; +} + +int32_t Hdfs2::Stat(const std::string& filepath, struct stat* st) { + hdfsFileInfo* pFileInfo = (*hdfsGetPathInfo)((hdfsFS)GetFSHandle(filepath), filepath.c_str()); + if (pFileInfo != NULL) { + HdfsFileInfo2PosixFileStat(pFileInfo, st); + return 0; + } + return -1; + +} + } // namespace leveldb diff --git a/src/leveldb/util/hdfs_util.h b/src/leveldb/util/hdfs_util.h new file mode 100644 index 000000000..ba2eb720b --- /dev/null +++ b/src/leveldb/util/hdfs_util.h @@ -0,0 +1,64 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// + +#ifndef TERA_LEVELDB_HDFS_UTIL_H +#define TERA_LEVELDB_HDFS_UTIL_H +#include +#include +#include +#include +#include +namespace leveldb { + +static void HdfsFileInfo2PosixFileStat(hdfsFileInfo* info, struct stat* st) { + memset(st, 0, sizeof(struct stat)); + //by default: set to 0 to indicate not support for directory because we can not get this info + st->st_nlink = (info->mKind == kObjectKindDirectory) ? 0 : 1; + uid_t owner_id = 99; // no body, magic number in linux + if (info->mOwner != NULL) { + struct passwd passwd_info; + struct passwd* result = NULL; + ssize_t buf_size = sysconf(_SC_GETPW_R_SIZE_MAX); + buf_size = buf_size == -1 ? 16384 : buf_size; + char* pwbuf = new char[buf_size]; + if (0 == getpwnam_r(info->mOwner, &passwd_info, pwbuf, buf_size, &result)) { + if (result != NULL) { + owner_id = passwd_info.pw_uid; + } + } + delete [] pwbuf; + } + gid_t group_id = 99; // no body, magic number in posix + if (info->mGroup != NULL) { + struct group result; + struct group* resultp; + ssize_t len = sysconf(_SC_GETGR_R_SIZE_MAX); + len = len == -1 ? 16384 : len; + char* group_buf = new char[len]; + if (0 == getgrnam_r(info->mGroup, &result, group_buf, len, &resultp)) { + if (resultp != NULL) { + group_id = result.gr_gid; + } + } + delete [] group_buf; + } + short file_mode = (info->mKind == kObjectKindDirectory) ? (S_IFDIR | 0777) : (S_IFREG | 0666); + if (info->mPermissions > 0) { + file_mode = (info->mKind == kObjectKindDirectory) ? S_IFDIR: S_IFREG; + file_mode |= info->mPermissions; + } + st->st_size = (info->mKind == kObjectKindDirectory) ? 4096 : info->mSize; + st->st_blksize = 512; // posix default block size + st->st_blocks = (st->st_size + st->st_blksize - 1)/st->st_blksize; + st->st_mode = file_mode; + st->st_uid = owner_id; + st->st_gid = group_id; + st->st_atime = info->mLastAccess; + st->st_ctime = info->mLastMod; + st->st_mtime = info->mLastMod; + return; +} +} +#endif diff --git a/src/leveldb/util/nfs.cc b/src/leveldb/util/nfs.cc index cb07a1797..37f0f0666 100644 --- a/src/leveldb/util/nfs.cc +++ b/src/leveldb/util/nfs.cc @@ -13,7 +13,7 @@ #include "util/mutexlock.h" #include "util/string_ext.h" #include "../common/timer.h" -#include "../utils/counter.h" +#include "../common/counter.h" namespace leveldb { @@ -29,6 +29,7 @@ static struct ::dirent* (*nfsReaddir)(nfs::NFSDIR* dir); static int (*nfsClosedir)(nfs::NFSDIR* dir); static int (*nfsSetDirOwner)(const char* path); static int (*nfsClearDirOwner)(const char* path); +static int (*nfsForceClearDirOwner)(const char* path); static int (*nfsStat)(const char* path, struct ::stat* stat); static int (*nfsUnlink)(const char* path); @@ -90,7 +91,7 @@ void Nfs::LoadSymbol() { } *(void**)(&printVersion) = ResolveSymbol(dl, "PrintNfsVersion"); - fprintf(stderr, "libnfs.so version: \n%s\n\n", (*printVersion)()); + //fprintf(stderr, "libnfs.so version: \n%s\n\n", (*printVersion)()); *(void**)(&nfsInit) = ResolveSymbol(dl, "Init"); *(void**)(&nfsSetComlogLevel) = ResolveSymbol(dl, "SetComlogLevel"); @@ -102,6 +103,7 @@ void Nfs::LoadSymbol() { *(void**)(&nfsClosedir) = ResolveSymbol(dl, "Closedir"); *(void**)(&nfsSetDirOwner) = ResolveSymbol(dl, "SetDirOwner"); *(void**)(&nfsClearDirOwner) = ResolveSymbol(dl, "ClearDirOwner"); + *(void**)(&nfsForceClearDirOwner) = ResolveSymbol(dl, "ForceClearDirOwner"); *(void**)(&nfsStat) = ResolveSymbol(dl, "Stat"); *(void**)(&nfsUnlink) = ResolveSymbol(dl, "Unlink"); *(void**)(&nfsAccess) = ResolveSymbol(dl, "Access"); @@ -256,7 +258,7 @@ int32_t Nfs::CreateDirectory(const std::string& name) { if (0 != (*nfsAccess)(path.c_str(), F_OK) && (*nfsGetErrno)() == ENOENT) { if (0 != (*nfsMkdir)(path.c_str()) && (*nfsGetErrno)() != EEXIST) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Createdir %s fail: %d\n", common::timer::get_curtime_str().c_str(), name.c_str(), errno); + fprintf(stderr, "[%s] Createdir %s fail: %d\n", tera::get_curtime_str().c_str(), name.c_str(), errno); return -1; } } @@ -268,7 +270,7 @@ int32_t Nfs::DeleteDirectory(const std::string& name) { int32_t retval = (*nfsRmdir)(name.c_str()); if (retval != 0) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] DeleteDirectory %s fail: %d\n", common::timer::get_curtime_str().c_str(), name.c_str(), errno); + fprintf(stderr, "[%s] DeleteDirectory %s fail: %d\n", tera::get_curtime_str().c_str(), name.c_str(), errno); } return retval; } @@ -277,7 +279,7 @@ int32_t Nfs::Exists(const std::string& filename) { if (retval != 0) { errno = (*nfsGetErrno)(); int errno_saved = errno; - fprintf(stderr, "[%s] Exists %s fail: %d\n", common::timer::get_curtime_str().c_str(), filename.c_str(), errno); + fprintf(stderr, "[%s] Exists %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); errno = errno_saved; } return retval; @@ -286,7 +288,7 @@ int32_t Nfs::Delete(const std::string& filename) { int32_t retval = (*nfsUnlink)(filename.c_str()); if (retval != 0) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Delete %s fail: %d\n", common::timer::get_curtime_str().c_str(), filename.c_str(), errno); + fprintf(stderr, "[%s] Delete %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); } return retval; } @@ -297,7 +299,7 @@ int32_t Nfs::GetFileSize(const std::string& filename, uint64_t* size) { *size = fileinfo.st_size; } else { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Getfilesize %s fail: %d\n", common::timer::get_curtime_str().c_str(), filename.c_str(), errno); + fprintf(stderr, "[%s] Getfilesize %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); } return retval; } @@ -305,7 +307,7 @@ int32_t Nfs::Rename(const std::string& from, const std::string& to) { int32_t retval = (*nfsRename)(from.c_str(), to.c_str()); if (retval != 0) { errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Rename %s to %s fail: %d\n", common::timer::get_curtime_str().c_str(), from.c_str(), to.c_str(), errno); + fprintf(stderr, "[%s] Rename %s to %s fail: %d\n", tera::get_curtime_str().c_str(), from.c_str(), to.c_str(), errno); } return retval; } @@ -322,10 +324,19 @@ DfsFile* Nfs::OpenFile(const std::string& filename, int32_t flags) { return new NFile(file, filename); } errno = (*nfsGetErrno)(); - fprintf(stderr, "[%s] Openfile %s fail: %d\n", common::timer::get_curtime_str().c_str(), filename.c_str(), errno); + fprintf(stderr, "[%s] Openfile %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); return NULL; } +int32_t Nfs::Stat(const std::string& filename, struct stat* fstat) { + int32_t retval = (*nfsStat)(filename.c_str(), fstat); + if (retval != 0) { + errno = (*nfsGetErrno)(); + //fprintf(stderr, "[%s] Stat %s fail: %d\n", tera::get_curtime_str().c_str(), filename.c_str(), errno); + } + return retval; +} + int32_t Nfs::Copy(const std::string& from, const std::string& to) { // not support return -1; @@ -336,7 +347,7 @@ int32_t Nfs::ListDirectory(const std::string& path, if (NULL == dir) { errno = (*nfsGetErrno)(); int errno_saved = errno; - fprintf(stderr, "[%s] Opendir %s fail: %d\n", common::timer::get_curtime_str().c_str(), path.c_str(), errno); + fprintf(stderr, "[%s] Opendir %s fail: %d\n", tera::get_curtime_str().c_str(), path.c_str(), errno); errno = errno_saved; return -1; } @@ -350,7 +361,7 @@ int32_t Nfs::ListDirectory(const std::string& path, errno = (*nfsGetErrno)(); int errno_saved = errno; if (0 != errno) { - fprintf(stderr, "[%s] List %s error: %d\n", common::timer::get_curtime_str().c_str(), path.c_str(), errno); + fprintf(stderr, "[%s] List %s error: %d\n", tera::get_curtime_str().c_str(), path.c_str(), errno); (*nfsClosedir)(dir); errno = errno_saved; return -1; @@ -394,5 +405,9 @@ int32_t Nfs::UnlockDirectory(const std::string& path) { return (*nfsClearDirOwner)(path.c_str()); } +int32_t Nfs::ClearDirOwner(const std::string& path) { + return (*nfsForceClearDirOwner)(path.c_str()); +} + } /* vim: set expandtab ts=2 sw=2 sts=2 tw=100: */ diff --git a/src/leveldb/util/nfs.h b/src/leveldb/util/nfs.h index b80dd0316..ab286d82b 100644 --- a/src/leveldb/util/nfs.h +++ b/src/leveldb/util/nfs.h @@ -50,7 +50,10 @@ class Nfs : public Dfs { int32_t ListDirectory(const std::string& path, std::vector* result); int32_t LockDirectory(const std::string& path); int32_t UnlockDirectory(const std::string& path); + int32_t ClearDirOwner(const std::string& path); + DfsFile* OpenFile(const std::string& filename, int32_t flags); + int32_t Stat(const std::string& filename, struct stat* fstat); private: Nfs(); static port::Mutex mu_; diff --git a/src/leveldb/util/options.cc b/src/leveldb/util/options.cc index ecd11b57e..e64512908 100644 --- a/src/leveldb/util/options.cc +++ b/src/leveldb/util/options.cc @@ -53,7 +53,10 @@ Options::Options() disable_wal(false), ignore_corruption_in_open(false), ttl_percentage(99), - del_percentage(20) { + del_percentage(20), + max_background_compactions(5), + slow_down_level0_score_limit(30), + max_sub_parallel_compaction(10) { } } // namespace leveldb diff --git a/src/leveldb/util/raw_key_operator.cc b/src/leveldb/util/raw_key_operator.cc index 9d5b5d3dc..8ce699c5b 100644 --- a/src/leveldb/util/raw_key_operator.cc +++ b/src/leveldb/util/raw_key_operator.cc @@ -7,7 +7,7 @@ #include #include "coding.h" -#include "../utils/counter.h" +#include "../common/counter.h" namespace leveldb { diff --git a/src/leveldb/util/status.cc b/src/leveldb/util/status.cc index 871a34872..14b22f82e 100644 --- a/src/leveldb/util/status.cc +++ b/src/leveldb/util/status.cc @@ -65,6 +65,9 @@ std::string Status::ToString() const { case kTimeOut: type = "Timeout error: "; break; + case kIOPermissionDenied: + type = "IO Permission Denied: "; + break; default: snprintf(tmp, sizeof(tmp), "Unknown code(%d): ", static_cast(code())); diff --git a/src/load_balancer/action.h b/src/load_balancer/action.h new file mode 100644 index 000000000..754382916 --- /dev/null +++ b/src/load_balancer/action.h @@ -0,0 +1,45 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_ACTION_H_ +#define TERA_LOAD_BALANCER_ACTION_H_ + +#include +#include + +namespace tera { +namespace load_balancer { + +class Action { +public: + enum class Type { + ASSIGN, + MOVE, + SWAP, + EMPTY, + }; + + Type GetType() const { + return type_; + } + +public: + Action(Type t) { + type_ = t; + } + + virtual ~Action() {} + + virtual Action* UndoAction() = 0; + + virtual std::string ToString() const = 0; + +private: + Type type_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_ACTION_H_ diff --git a/src/load_balancer/action_generator.h b/src/load_balancer/action_generator.h new file mode 100644 index 000000000..77403bfe1 --- /dev/null +++ b/src/load_balancer/action_generator.h @@ -0,0 +1,67 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_ACTION_GENERATOR_H_ +#define TERA_LOAD_BALANCER_ACTION_GENERATOR_H_ + +#include +#include +#include + +#include "load_balancer/action.h" +#include "load_balancer/cluster.h" +#include "load_balancer/random.h" + +namespace tera { +namespace load_balancer { + +const uint32_t kInvalidNodeIndex = std::numeric_limits::max(); +const uint32_t kInvalidTabletIndex = std::numeric_limits::max(); + +class ActionGenerator { +public: + virtual ~ActionGenerator() {} + + virtual Action* Generate(const std::shared_ptr& cluster) = 0; + + virtual std::string Name() = 0; + + virtual uint32_t PickRandomNode(const std::shared_ptr& cluster) { + if (cluster->tablet_node_num_ > 0) { + return Random::Rand(0, cluster->tablet_node_num_); + } else { + return kInvalidNodeIndex; + } + } + + // pick a different node with the picked_index + virtual uint32_t PickOtherRandomNode(const std::shared_ptr& cluster, + const uint32_t picked_index) { + assert(cluster->tablet_node_num_ >= 2); + + while (true) { + uint32_t node_index = PickRandomNode(cluster); + if (node_index != picked_index) { + return node_index; + } + } + } + + virtual uint32_t PickRandomTabletOfNode(const std::shared_ptr& cluster, + const uint32_t node_index) { + uint32_t tablet_num = cluster->tablets_per_node_[node_index].size(); + + if (tablet_num > 0) { + uint32_t rand = Random::Rand(0, tablet_num); + return cluster->tablets_per_node_[node_index][rand]; + } else { + return kInvalidTabletIndex; + } + } +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_ACTION_GENERATOR_H_ diff --git a/src/load_balancer/action_generators.cc b/src/load_balancer/action_generators.cc new file mode 100644 index 000000000..f0cfe53d1 --- /dev/null +++ b/src/load_balancer/action_generators.cc @@ -0,0 +1,344 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include + +#include "glog/logging.h" +#include "load_balancer/action_generators.h" +#include "load_balancer/actions.h" +#include "load_balancer/random.h" + +namespace tera { +namespace load_balancer { + +RandomActionGenerator::RandomActionGenerator() : + name_("RandomActionGenerator") { +} + +RandomActionGenerator::~RandomActionGenerator() { +} + +Action* RandomActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] RandomActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + uint32_t source_node_index = PickRandomNode(cluster); + uint32_t dest_node_index = PickOtherRandomNode(cluster, source_node_index); + uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + + if (tablet_index == kInvalidTabletIndex || + source_node_index == kInvalidNodeIndex || + dest_node_index == kInvalidNodeIndex) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index); +} + +std::string RandomActionGenerator::Name() { + return name_; +} + +TabletCountActionGenerator::TabletCountActionGenerator() : + name_("TabletCountActionGenerator") { +} + +TabletCountActionGenerator::~TabletCountActionGenerator() { +} + +Action* TabletCountActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] TabletCountActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + cluster->SortNodesByTabletCount(); + + uint32_t source_node_index = PickMostTabletsNode(cluster); + uint32_t dest_node_index = PickLeastTabletsNode(cluster); + uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + + if (tablet_index == kInvalidTabletIndex || + source_node_index == kInvalidNodeIndex || + dest_node_index == kInvalidNodeIndex || + source_node_index == dest_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index); +} + +uint32_t TabletCountActionGenerator::PickMostTabletsNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_tablet_count_.size() >= 1) { + return cluster->node_index_sorted_by_tablet_count_[cluster->node_index_sorted_by_tablet_count_.size() - 1]; + } else { + return kInvalidTabletIndex; + } +} + +uint32_t TabletCountActionGenerator::PickLeastTabletsNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_tablet_count_.size() >= 1) { + uint32_t index = 0; + if (cluster->lb_options_.meta_table_isolate_enabled) { + while (cluster->node_index_sorted_by_tablet_count_[index] == cluster->meta_table_node_index_) { + ++index; + if (index == cluster->node_index_sorted_by_tablet_count_.size()) { + return kInvalidNodeIndex; + } + } + } + return cluster->node_index_sorted_by_tablet_count_[index]; + } else { + return kInvalidTabletIndex; + } +} + +std::string TabletCountActionGenerator::Name() { + return name_; +} + +SizeActionGenerator::SizeActionGenerator() : + name_("SizeActionGenerator") { +} + +SizeActionGenerator::~SizeActionGenerator() { +} + +Action* SizeActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] SizeActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + cluster->SortNodesBySize(); + + uint32_t source_node_index = PickLargestSizeNode(cluster); + uint32_t dest_node_index = PickSmallestSizeNode(cluster); + uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + + if (tablet_index == kInvalidTabletIndex || + source_node_index == kInvalidNodeIndex || + dest_node_index == kInvalidNodeIndex || + source_node_index == dest_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index); +} + +uint32_t SizeActionGenerator::PickLargestSizeNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_size_.size() >= 1) { + return cluster->node_index_sorted_by_size_[cluster->node_index_sorted_by_size_.size() - 1]; + } else { + return kInvalidTabletIndex; + } +} + +uint32_t SizeActionGenerator::PickSmallestSizeNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_size_.size() >= 1) { + uint32_t index = 0; + if (cluster->lb_options_.meta_table_isolate_enabled) { + while (cluster->node_index_sorted_by_size_[index] == cluster->meta_table_node_index_) { + ++index; + if (index == cluster->node_index_sorted_by_size_.size()) { + return kInvalidNodeIndex; + } + } + } + return cluster->node_index_sorted_by_size_[index]; + } else { + return kInvalidTabletIndex; + } +} + +std::string SizeActionGenerator::Name() { + return name_; +} + +ReadLoadActionGenerator::ReadLoadActionGenerator() : + name_("ReadLoadActionGenerator") { +} + +ReadLoadActionGenerator::~ReadLoadActionGenerator() { +} + +Action* ReadLoadActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] ReadLoadActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + cluster->SortNodesByReadLoad(); + + uint32_t source_node_index = PickMostReadNode(cluster); + uint32_t dest_node_index = PickLeastReadNode(cluster); + uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + + if (tablet_index == kInvalidTabletIndex || + source_node_index == kInvalidNodeIndex || + dest_node_index == kInvalidNodeIndex || + source_node_index == dest_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index); +} + +uint32_t ReadLoadActionGenerator::PickMostReadNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_read_load_.size() >= 1) { + return cluster->node_index_sorted_by_read_load_[cluster->node_index_sorted_by_read_load_.size() - 1]; + } else { + return kInvalidTabletIndex; + } +} + +uint32_t ReadLoadActionGenerator::PickLeastReadNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_read_load_.size() >= 1) { + uint32_t index = 0; + if (cluster->lb_options_.meta_table_isolate_enabled) { + while (cluster->node_index_sorted_by_read_load_[index] == cluster->meta_table_node_index_) { + ++index; + if (index == cluster->node_index_sorted_by_read_load_.size()) { + return kInvalidNodeIndex; + } + } + } + return cluster->node_index_sorted_by_read_load_[index]; + } else { + return kInvalidTabletIndex; + } +} + +std::string ReadLoadActionGenerator::Name() { + return name_; +} + +WriteLoadActionGenerator::WriteLoadActionGenerator() : + name_("WriteLoadActionGenerator") { +} + +WriteLoadActionGenerator::~WriteLoadActionGenerator() { +} + +Action* WriteLoadActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] WriteLoadActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + cluster->SortNodesByWriteLoad(); + + uint32_t source_node_index = PickMostWriteNode(cluster); + uint32_t dest_node_index = PickLeastWriteNode(cluster); + uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + + if (tablet_index == kInvalidTabletIndex || + source_node_index == kInvalidNodeIndex || + dest_node_index == kInvalidNodeIndex || + source_node_index == dest_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index); +} + +uint32_t WriteLoadActionGenerator::PickMostWriteNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_write_load_.size() >= 1) { + return cluster->node_index_sorted_by_write_load_[cluster->node_index_sorted_by_write_load_.size() - 1]; + } else { + return kInvalidTabletIndex; + } +} + +uint32_t WriteLoadActionGenerator::PickLeastWriteNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_write_load_.size() >= 1) { + uint32_t index = 0; + if (cluster->lb_options_.meta_table_isolate_enabled) { + while (cluster->node_index_sorted_by_write_load_[index] == cluster->meta_table_node_index_) { + ++index; + if (index == cluster->node_index_sorted_by_write_load_.size()) { + return kInvalidNodeIndex; + } + } + } + return cluster->node_index_sorted_by_write_load_[index]; + } else { + return kInvalidTabletIndex; + } +} + +std::string WriteLoadActionGenerator::Name() { + return name_; +} + +ScanLoadActionGenerator::ScanLoadActionGenerator() : + name_("ScanLoadActionGenerator") { +} + +ScanLoadActionGenerator::~ScanLoadActionGenerator() { +} + +Action* ScanLoadActionGenerator::Generate(const std::shared_ptr& cluster) { + VLOG(20) << "[lb] ScanLoadActionGenerator worked"; + + if (cluster->tablet_node_num_ < 2) { + return new EmptyAction(); + } + + cluster->SortNodesByScanLoad(); + + uint32_t source_node_index = PickMostScanNode(cluster); + uint32_t dest_node_index = PickLeastScanNode(cluster); + uint32_t tablet_index = PickRandomTabletOfNode(cluster, source_node_index); + + if (tablet_index == kInvalidTabletIndex || + source_node_index == kInvalidNodeIndex || + dest_node_index == kInvalidNodeIndex || + source_node_index == dest_node_index) { + return new EmptyAction(); + } + + return new MoveAction(tablet_index, source_node_index, dest_node_index); +} + +uint32_t ScanLoadActionGenerator::PickMostScanNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_scan_load_.size() >= 1) { + return cluster->node_index_sorted_by_scan_load_[cluster->node_index_sorted_by_scan_load_.size() - 1]; + } else { + return kInvalidTabletIndex; + } +} + +uint32_t ScanLoadActionGenerator::PickLeastScanNode(const std::shared_ptr& cluster) { + if (cluster->node_index_sorted_by_scan_load_.size() >= 1) { + uint32_t index = 0; + if (cluster->lb_options_.meta_table_isolate_enabled) { + while (cluster->node_index_sorted_by_scan_load_[index] == cluster->meta_table_node_index_) { + ++index; + if (index == cluster->node_index_sorted_by_scan_load_.size()) { + return kInvalidNodeIndex; + } + } + } + return cluster->node_index_sorted_by_scan_load_[index]; + } else { + return kInvalidTabletIndex; + } +} + +std::string ScanLoadActionGenerator::Name() { + return name_; +} + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/action_generators.h b/src/load_balancer/action_generators.h new file mode 100644 index 000000000..16c663ae7 --- /dev/null +++ b/src/load_balancer/action_generators.h @@ -0,0 +1,134 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_ACTION_GENERATORS_H_ +#define TERA_LOAD_BALANCER_ACTION_GENERATORS_H_ + +#include + +#include "load_balancer/action_generator.h" +#include "load_balancer/actions.h" + +namespace tera { +namespace load_balancer { + +// move a random tablet of a random node to another random node +class RandomActionGenerator : public ActionGenerator { +public: + RandomActionGenerator(); + virtual ~RandomActionGenerator(); + + // generate a random move action + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + +private: + std::string name_; +}; + +// move a tablet +// from the node holding most tablets +// to the node holding least tablets +class TabletCountActionGenerator : public ActionGenerator { +public: + TabletCountActionGenerator(); + virtual ~TabletCountActionGenerator(); + + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + +private: + uint32_t PickMostTabletsNode(const std::shared_ptr& cluster); + uint32_t PickLeastTabletsNode(const std::shared_ptr& cluster); + +private: + std::string name_; +}; + +// move a tablet +// from the node holding largest data size +// to the node holding smallest data size +class SizeActionGenerator : public ActionGenerator { +public: + SizeActionGenerator(); + virtual ~SizeActionGenerator(); + + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + +private: + uint32_t PickLargestSizeNode(const std::shared_ptr& cluster); + uint32_t PickSmallestSizeNode(const std::shared_ptr& cluster); + +private: + std::string name_; +}; + +// move a tablet +// from the node has most read load +// to the node has least read load +class ReadLoadActionGenerator : public ActionGenerator { +public: + ReadLoadActionGenerator(); + virtual ~ReadLoadActionGenerator(); + + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + +private: + uint32_t PickMostReadNode(const std::shared_ptr& cluster); + uint32_t PickLeastReadNode(const std::shared_ptr& cluster); + +private: + std::string name_; +}; + +// move a tablet +// from the node has most write load +// to the node has least write load +class WriteLoadActionGenerator : public ActionGenerator { +public: + WriteLoadActionGenerator(); + virtual ~WriteLoadActionGenerator(); + + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + +private: + uint32_t PickMostWriteNode(const std::shared_ptr& cluster); + uint32_t PickLeastWriteNode(const std::shared_ptr& cluster); + +private: + std::string name_; +}; + +// move a tablet +// from the node has most scan load +// to the node has least scan load +class ScanLoadActionGenerator : public ActionGenerator { +public: + ScanLoadActionGenerator(); + virtual ~ScanLoadActionGenerator(); + + virtual Action* Generate(const std::shared_ptr& cluster) override; + + virtual std::string Name() override; + +private: + uint32_t PickMostScanNode(const std::shared_ptr& cluster); + uint32_t PickLeastScanNode(const std::shared_ptr& cluster); + +private: + std::string name_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_ACTION_GENERATORS_H_ diff --git a/src/load_balancer/actions.cc b/src/load_balancer/actions.cc new file mode 100644 index 000000000..0be2d9d5e --- /dev/null +++ b/src/load_balancer/actions.cc @@ -0,0 +1,47 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "load_balancer/actions.h" + +namespace tera { +namespace load_balancer { + +EmptyAction::EmptyAction() : + Action(Action::Type::EMPTY) { +} + +EmptyAction::~EmptyAction() { +} + +Action* EmptyAction::UndoAction() { + return new EmptyAction(); +} + +std::string EmptyAction::ToString() const { + return "EmptyAction"; +} + +MoveAction::MoveAction(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index) : + Action(Action::Type::MOVE), + tablet_index_(tablet_index), + source_node_index_(source_node_index), + dest_node_index_(dest_node_index) { +} + +MoveAction::~MoveAction() { +} + +Action* MoveAction::UndoAction() { + return new MoveAction(tablet_index_, dest_node_index_, source_node_index_); +} + +std::string MoveAction::ToString() const { + return "move " + std::to_string(tablet_index_) + " from " + + std::to_string(source_node_index_) + " to " + std::to_string(dest_node_index_); +} + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/actions.h b/src/load_balancer/actions.h new file mode 100644 index 000000000..f4751ea9c --- /dev/null +++ b/src/load_balancer/actions.h @@ -0,0 +1,43 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_ACTIONS_H_ +#define TERA_LOAD_BALANCER_ACTIONS_H_ + +#include + +#include "load_balancer/action.h" + +namespace tera { +namespace load_balancer { + +class EmptyAction : public Action { +public: + EmptyAction(); + virtual ~EmptyAction(); + + virtual Action* UndoAction() override; + + virtual std::string ToString() const override; +}; + +class MoveAction : public Action { +public: + MoveAction(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index); + virtual ~MoveAction(); + + virtual Action* UndoAction() override; + + virtual std::string ToString() const override; + +public: + uint32_t tablet_index_; + uint32_t source_node_index_; + uint32_t dest_node_index_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_ACTIONS_H_ diff --git a/src/load_balancer/balancer.h b/src/load_balancer/balancer.h new file mode 100644 index 000000000..2ad1727ea --- /dev/null +++ b/src/load_balancer/balancer.h @@ -0,0 +1,39 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_BALANCER_H_ +#define TERA_LOAD_BALANCER_BALANCER_H_ + +#include +#include + +#include "load_balancer/lb_node.h" +#include "load_balancer/options.h" +#include "load_balancer/plan.h" +#include "master/tablet_manager.h" +#include "master/tabletnode_manager.h" + +namespace tera { +namespace load_balancer { + +class Balancer { +public: + virtual ~Balancer() {} + + // balance the whole cluster + virtual bool BalanceCluster( + const std::vector>& lb_nodes, + std::vector* plans) = 0; + + // balance for the specified table + virtual bool BalanceCluster( + const std::string& table_name, + const std::vector>& lb_nodes, + std::vector* plans) = 0; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_BALANCER_H_ diff --git a/src/load_balancer/cluster.cc b/src/load_balancer/cluster.cc new file mode 100644 index 000000000..72a3f740e --- /dev/null +++ b/src/load_balancer/cluster.cc @@ -0,0 +1,537 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include +#include + +#include "glog/logging.h" +#include "load_balancer/actions.h" +#include "load_balancer/cluster.h" +#include "common/timer.h" + +namespace tera { +namespace load_balancer { + +Cluster::Cluster(const std::vector>& lb_nodes, + const LBOptions& options) : + meta_table_node_index_(std::numeric_limits::max()), + lb_options_(options) { + int64_t start_time_ns = get_micros(); + + for (const auto& node : lb_nodes) { + if (lb_options_.meta_table_isolate_enabled && + node->tablet_node_ptr->GetAddr() == lb_options_.meta_table_node_addr) { + VLOG(5) << "skip meta table node:" << lb_options_.meta_table_node_addr; + } else { + lb_nodes_.emplace_back(node); + } + } + + table_num_ = 0; + tablet_node_num_ = 0; + tablet_num_ = 0; + tablet_moved_num_ = 0; + + for (const auto& node : lb_nodes_) { + uint32_t node_index = nodes_.size(); + nodes_[node_index] = node; + + std::string addr = node->tablet_node_ptr->GetAddr(); + assert(nodes_to_index_.find(addr) == nodes_to_index_.end()); + nodes_to_index_[addr] = node_index; + + tablets_per_node_[node_index].clear(); + initial_tablets_not_ready_per_node_[node_index].clear(); + size_per_node_[node_index] = 0; + read_load_per_node_[node_index] = 0; + write_load_per_node_[node_index] = 0; + scan_load_per_node_[node_index] = 0; + + node_index_sorted_by_tablet_count_.emplace_back(node_index); + node_index_sorted_by_size_.emplace_back(node_index); + node_index_sorted_by_read_load_.emplace_back(node_index); + node_index_sorted_by_write_load_.emplace_back(node_index); + node_index_sorted_by_scan_load_.emplace_back(node_index); + + if (node->tablet_node_ptr->GetReadPending() > 0) { + read_pending_nodes_index_.insert(node_index); + } + if (node->tablet_node_ptr->GetWritePending() > 0) { + write_pending_nodes_index_.insert(node_index); + } + if (node->tablet_node_ptr->GetScanPending() > 0) { + scan_pending_nodes_index_.insert(node_index); + } + + for (const auto& tablet : node->tablets) { + uint32_t tablet_index = tablets_.size(); + + RegisterTablet(tablet, tablet_index, node_index); + + tablets_per_node_[node_index].emplace_back(tablet_index); + if (tablets_[tablet_index]->tablet_ptr->GetStatus() != kTableReady) { + initial_tablets_not_ready_per_node_[node_index].emplace_back(tablet_index); + } + size_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetDataSize()); + read_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetReadQps()); + write_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetWriteQps()); + scan_load_per_node_[node_index] += static_cast(tablet->tablet_ptr->GetScanQps()); + + ++tablet_num_; + } + + ++ tablet_node_num_; + } + + // if not ready tablets' ratio is higher than option, the node is considered abnormal + for (uint32_t i = 0; i < tablets_per_node_.size(); ++i) { + if (tablets_per_node_[i].size() != 0) { + double note_ready_num = static_cast(initial_tablets_not_ready_per_node_[i].size()); + double total_num = static_cast(tablets_per_node_[i].size()); + if (note_ready_num / total_num >= lb_options_.abnormal_node_ratio) { + abnormal_nodes_index_.insert(i); + } + } + } + + assert(table_num_ == tables_.size()); + assert(tablet_node_num_ == nodes_.size()); + assert(tablet_num_ == tablets_.size()); + + assert(table_num_ == tables_to_index_.size()); + assert(tablet_node_num_ == nodes_to_index_.size()); + assert(tablet_num_ == tablets_to_index_.size()); + + assert(tablet_num_ == tablet_index_to_node_index_.size()); + assert(tablet_num_ == initial_tablet_index_to_node_index_.size()); + assert(tablet_num_ == tablet_index_to_table_index_.size()); + + assert(tablet_node_num_ == tablets_per_node_.size()); + assert(tablet_node_num_ == initial_tablets_not_ready_per_node_.size()); + assert(tablet_node_num_ == size_per_node_.size()); + assert(tablet_node_num_ == read_load_per_node_.size()); + assert(tablet_node_num_ == write_load_per_node_.size()); + assert(tablet_node_num_ == scan_load_per_node_.size()); + assert(abnormal_nodes_index_.size() <= tablet_node_num_); + assert(read_pending_nodes_index_.size() <= tablet_node_num_); + assert(write_pending_nodes_index_.size() <= tablet_node_num_); + assert(scan_pending_nodes_index_.size() <= tablet_node_num_); + + assert(tablet_node_num_ == node_index_sorted_by_tablet_count_.size()); + assert(tablet_node_num_ == node_index_sorted_by_size_.size()); + assert(tablet_node_num_ == node_index_sorted_by_read_load_.size()); + assert(tablet_node_num_ == node_index_sorted_by_write_load_.size()); + assert(tablet_node_num_ == node_index_sorted_by_scan_load_.size()); + + VLOG(20) << "[lb] construct Cluster cost time(ms):" << (get_micros() - start_time_ns) / 1000; +} + +Cluster::~Cluster() { +} + +void Cluster::DebugCluster() { + LOG(INFO) << ""; + LOG(INFO) << "DebugCluster begin -----"; + + LOG(INFO) << "table_num_:" << table_num_; + LOG(INFO) << "tablet_node_num_:" << tablet_node_num_; + LOG(INFO) << "tablet_num_:" << tablet_num_; + LOG(INFO) << "tablet_moved_num_:" << tablet_moved_num_; + + LOG(INFO) << "[table_index -> table]:"; + for (const auto& table : tables_) { + LOG(INFO) << table.first << " -> " << table.second; + } + + LOG(INFO) << "[node_index -> node]:"; + for (const auto& node : nodes_) { + LOG(INFO) << node.first << " -> " << node.second->tablet_node_ptr->GetAddr(); + } + LOG(INFO) << "meta_table_node_index_:" << meta_table_node_index_; + + LOG(INFO) << "[tablet_index -> tablet]:"; + for (const auto& tablet : tablets_) { + LOG(INFO) << tablet.first << " -> " << tablet.second->tablet_ptr->GetPath(); + } + + LOG(INFO) << "[table -> table_index]:"; + for (const auto& table : tables_to_index_) { + LOG(INFO) << table.first << " -> " << table.second; + } + + LOG(INFO) << "[node -> node_index]:"; + for (const auto& node : nodes_to_index_) { + LOG(INFO) << node.first << " -> " << node.second; + } + + LOG(INFO) << "[tablet -> tablet_index]:"; + for (const auto& tablet : tablets_to_index_) { + LOG(INFO) << tablet.first << " -> " << tablet.second; + } + + LOG(INFO) << "[tablet_index -> node_index]:"; + for (const auto& it : tablet_index_to_node_index_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[initial tablet_index -> node_index]:"; + for (const auto& it : initial_tablet_index_to_node_index_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[tablet_index -> table_index]:"; + for (const auto& it : tablet_index_to_table_index_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[node_index -> tablets index]:"; + for (const auto& it : tablets_per_node_) { + std::string line = std::to_string(it.first) + " ->"; + for (const auto tablet : it.second) { + line += " "; + line += std::to_string(tablet); + } + LOG(INFO) << line; + } + + LOG(INFO) << "[node_index -> data size]:"; + for (const auto& it : size_per_node_) { + LOG(INFO) << it.first << " -> " << it.second << "B"; + } + + LOG(INFO) << "[node_index -> read load]:"; + for (const auto& it : read_load_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[node_index -> write load]:"; + for (const auto& it : write_load_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[node_index -> scan load]:"; + for (const auto& it : scan_load_per_node_) { + LOG(INFO) << it.first << " -> " << it.second; + } + + LOG(INFO) << "[tablets index of moved too frequently]:"; + for (const auto& tablet : tablets_moved_too_frequently_) { + LOG(INFO) << tablet; + } + + LOG(INFO) << "[node_index -> not ready tablets index]:"; + for (const auto& it : initial_tablets_not_ready_per_node_) { + std::string line = std::to_string(it.first) + " ->"; + for (const auto tablet : it.second) { + line += " "; + line += std::to_string(tablet); + } + LOG(INFO) << line; + } + + LOG(INFO) << "[abnormal nodes index]:"; + for (const auto& node: abnormal_nodes_index_) { + LOG(INFO) << node; + } + + LOG(INFO) << "[tablets index of moved to abnormal nodes]:"; + for (const auto& tablet : tablets_moved_to_abnormal_nodes_) { + LOG(INFO) << tablet; + } + + LOG(INFO) << "[read pending nodes index]:"; + for (const auto& node: read_pending_nodes_index_) { + LOG(INFO) << node; + } + + LOG(INFO) << "[tablets index of moved to read pending nodes]:"; + for (const auto& tablet : tablets_moved_to_read_pending_nodes_) { + LOG(INFO) << tablet; + } + + LOG(INFO) << "[write pending nodes index]:"; + for (const auto& node: write_pending_nodes_index_) { + LOG(INFO) << node; + } + + LOG(INFO) << "[tablets index of moved to write pending nodes]:"; + for (const auto& tablet : tablets_moved_to_write_pending_nodes_) { + LOG(INFO) << tablet; + } + + LOG(INFO) << "[scan pending nodes index]:"; + for (const auto& node: scan_pending_nodes_index_) { + LOG(INFO) << node; + } + + LOG(INFO) << "[tablets index of moved to scan pending nodes]:"; + for (const auto& tablet : tablets_moved_to_scan_pending_nodes_) { + LOG(INFO) << tablet; + } + + LOG(INFO) << "DebugCluster end -----"; + LOG(INFO) << ""; +} + +bool Cluster::ValidAction(const std::shared_ptr& action) { + switch (action->GetType()) { + case Action::Type::EMPTY: + return false; + case Action::Type::ASSIGN: + return true; + case Action::Type::MOVE: { + MoveAction* move_action = dynamic_cast(action.get()); + if (tablets_[move_action->tablet_index_]->tablet_ptr->GetStatus() != kTableReady) { + VLOG(20) << "[lb] invalid action, reason:tablet not ready, tablet status:" + << StatusCodeToString(tablets_[move_action->tablet_index_]->tablet_ptr->GetStatus()); + return false; + } + + if (tables_[tablet_index_to_table_index_[move_action->tablet_index_]] == + lb_options_.meta_table_name) { + VLOG(20) << "[lb] invalid action, reason:move meta table"; + return false; + } + + if (lb_options_.meta_table_isolate_enabled && + move_action->dest_node_index_ == meta_table_node_index_) { + VLOG(20) << "[lb] invalid action, reason:move tablet to meta table node"; + return false; + } + + return true; + } + case Action::Type::SWAP: + return true;; + default: + return false; + } +} + +void Cluster::DoAction(const std::shared_ptr& action) { + switch (action->GetType()) { + case Action::Type::EMPTY: + break; + case Action::Type::ASSIGN: + break; + case Action::Type::MOVE: { + MoveAction* move_action = dynamic_cast(action.get()); + VLOG(20) << "[lb] DoAction: " << move_action->ToString(); + assert(move_action->source_node_index_ != move_action->dest_node_index_); + + RemoveTablet(move_action->tablet_index_, move_action->source_node_index_); + AddTablet(move_action->tablet_index_, move_action->dest_node_index_); + MoveTablet(move_action->tablet_index_, move_action->source_node_index_, move_action->dest_node_index_); + + break; + } + case Action::Type::SWAP: + break; + default: + break; + } +} + +void Cluster::SortNodesByTabletCount() { + std::sort( + node_index_sorted_by_tablet_count_.begin(), + node_index_sorted_by_tablet_count_.end(), + [this](int a, int b) { + return tablets_per_node_[a].size() < tablets_per_node_[b].size(); + }); +} + +void Cluster::SortNodesBySize() { + std::sort( + node_index_sorted_by_size_.begin(), + node_index_sorted_by_size_.end(), + [this](int a, int b) { + return size_per_node_[a] < size_per_node_[b]; + }); +} + +void Cluster::SortNodesByReadLoad() { + std::sort( + node_index_sorted_by_read_load_.begin(), + node_index_sorted_by_read_load_.end(), + [this](int a, int b) { + return read_load_per_node_[a] < read_load_per_node_[b]; + }); +} + +void Cluster::SortNodesByWriteLoad() { + std::sort( + node_index_sorted_by_write_load_.begin(), + node_index_sorted_by_write_load_.end(), + [this](int a, int b) { + return write_load_per_node_[a] < write_load_per_node_[b]; + }); +} + +void Cluster::SortNodesByScanLoad() { + std::sort( + node_index_sorted_by_scan_load_.begin(), + node_index_sorted_by_scan_load_.end(), + [this](int a, int b) { + return scan_load_per_node_[a] < scan_load_per_node_[b]; + }); +} + +void Cluster::RegisterTablet(const std::shared_ptr& tablet, uint32_t tablet_index, uint32_t node_index) { + std::string table_name = tablet->tablet_ptr->GetTableName(); + if (tables_to_index_.find(table_name) == tables_to_index_.end()) { + uint32_t table_index = tables_.size(); + tables_[table_index] = table_name; + tables_to_index_[table_name] = table_index; + ++table_num_; + + if (table_name == lb_options_.meta_table_name) { + meta_table_node_index_ = node_index; + } + } + + std::string path = tablet->tablet_ptr->GetPath(); + tablets_to_index_[path] = tablet_index; + tablets_[tablet_index] = tablet; + + tablet_index_to_node_index_[tablet_index] = node_index; + initial_tablet_index_to_node_index_[tablet_index] = node_index; + tablet_index_to_table_index_[tablet_index] = tables_to_index_[table_name]; +} + +void Cluster::AddTablet(uint32_t tablet_index, uint32_t to_node_index) { + tablets_per_node_[to_node_index].emplace_back(tablet_index); + + size_per_node_[to_node_index] += static_cast( + tablets_[tablet_index]->tablet_ptr->GetDataSize()); + read_load_per_node_[to_node_index] += static_cast( + tablets_[tablet_index]->tablet_ptr->GetReadQps()); + write_load_per_node_[to_node_index] += static_cast( + tablets_[tablet_index]->tablet_ptr->GetWriteQps()); + scan_load_per_node_[to_node_index] += static_cast( + tablets_[tablet_index]->tablet_ptr->GetScanQps()); +} + +void Cluster::RemoveTablet(uint32_t tablet_index, uint32_t from_node_index) { + if (tablets_per_node_.find(from_node_index) == tablets_per_node_.end()) { + return; + } + auto& tablets = tablets_per_node_[from_node_index]; + for (auto it = tablets.begin(); it != tablets.end();) { + if (*it == tablet_index) { + it = tablets.erase(it); + break; + } else { + ++it; + } + } + + size_per_node_[from_node_index] -= static_cast( + tablets_[tablet_index]->tablet_ptr->GetDataSize()); + read_load_per_node_[from_node_index] -= static_cast( + tablets_[tablet_index]->tablet_ptr->GetReadQps()); + write_load_per_node_[from_node_index] -= static_cast( + tablets_[tablet_index]->tablet_ptr->GetWriteQps()); + scan_load_per_node_[from_node_index] -= static_cast( + tablets_[tablet_index]->tablet_ptr->GetScanQps()); + + assert(size_per_node_[from_node_index] >= 0); + assert(read_load_per_node_[from_node_index] >= 0); + assert(write_load_per_node_[from_node_index] >= 0); + assert(scan_load_per_node_[from_node_index] >= 0); +} + +void Cluster::MoveTablet(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index) { + tablet_index_to_node_index_[tablet_index] = dest_node_index; + + if (initial_tablet_index_to_node_index_[tablet_index] == source_node_index) { + ++tablet_moved_num_; + + int64_t last_move_time_us = tablets_[tablet_index]->tablet_ptr->LastMoveTime(); + int64_t current_time_us = get_micros(); + if (current_time_us - last_move_time_us < + 1000000 * static_cast(lb_options_.tablet_move_too_frequently_threshold_s)) { + tablets_moved_too_frequently_.insert(tablet_index); + VLOG(20) << "[lb] add tablet moved too frequently, tablet index: " << tablet_index + << ", last_move_time: " << last_move_time_us << ", current time: " << current_time_us + << ", tablets_moved_too_frequently_ size: " << tablets_moved_too_frequently_.size(); + } + } else if (initial_tablet_index_to_node_index_[tablet_index] == dest_node_index) { + // tablet moved back + --tablet_moved_num_; + assert(tablet_moved_num_ >= 0); + + if (tablets_moved_too_frequently_.find(tablet_index) != tablets_moved_too_frequently_.end()) { + tablets_moved_too_frequently_.erase(tablet_index); + VLOG(20) << "[lb] remove tablet moved too frequently, tablet index: " << tablet_index + << ", tablets_moved_too_frequently_ size: " << tablets_moved_too_frequently_.size(); + } + } else { + } + + if (abnormal_nodes_index_.find(dest_node_index) != abnormal_nodes_index_.end() && + dest_node_index != initial_tablet_index_to_node_index_[tablet_index]) { + tablets_moved_to_abnormal_nodes_.insert(tablet_index); + VLOG(20) << "[lb] add tablet moved to abnormal node, tablet index: " << tablet_index + << ", node index: " << dest_node_index + << ", tablets_moved_to_abnormal_nodes_ size: " << tablets_moved_to_abnormal_nodes_.size(); + } else if (abnormal_nodes_index_.find(source_node_index) != abnormal_nodes_index_.end()) { + if (tablets_moved_to_abnormal_nodes_.find(tablet_index) != tablets_moved_to_abnormal_nodes_.end()) { + tablets_moved_to_abnormal_nodes_.erase(tablet_index); + VLOG(20) << "[lb] remove tablet moved to abnormal nodes, tablet index: " << tablet_index + << ", tablets_moved_to_abnormal_nodes_ size: " << tablets_moved_to_abnormal_nodes_.size(); + } + } else { + } + + if (read_pending_nodes_index_.find(dest_node_index) != read_pending_nodes_index_.end() && + dest_node_index != initial_tablet_index_to_node_index_[tablet_index]) { + tablets_moved_to_read_pending_nodes_.insert(tablet_index); + VLOG(20) << "[lb] add tablet moved to read pending node, tablet index: " << tablet_index + << ", node index: " << dest_node_index + << ", tablets_moved_to_read_pending_nodes_ size: " << tablets_moved_to_read_pending_nodes_.size(); + } else if (read_pending_nodes_index_.find(source_node_index) != read_pending_nodes_index_.end()) { + if (tablets_moved_to_read_pending_nodes_.find(tablet_index) != tablets_moved_to_read_pending_nodes_.end()) { + tablets_moved_to_read_pending_nodes_.erase(tablet_index); + VLOG(20) << "[lb] remove tablet moved to read pending nodes, tablet index: " << tablet_index + << ", tablets_moved_to_read_pending_nodes_ size: " << tablets_moved_to_read_pending_nodes_.size(); + } + } else { + } + + if (write_pending_nodes_index_.find(dest_node_index) != write_pending_nodes_index_.end() && + dest_node_index != initial_tablet_index_to_node_index_[tablet_index]) { + tablets_moved_to_write_pending_nodes_.insert(tablet_index); + VLOG(20) << "[lb] add tablet moved to write pending node, tablet index: " << tablet_index + << ", node index: " << dest_node_index + << ", tablets_moved_to_write_pending_nodes_ size: " << tablets_moved_to_write_pending_nodes_.size(); + } else if (write_pending_nodes_index_.find(source_node_index) != write_pending_nodes_index_.end()) { + if (tablets_moved_to_write_pending_nodes_.find(tablet_index) != tablets_moved_to_write_pending_nodes_.end()) { + tablets_moved_to_write_pending_nodes_.erase(tablet_index); + VLOG(20) << "[lb] remove tablet moved to write pending nodes, tablet index: " << tablet_index + << ", tablets_moved_to_write_pending_nodes_ size: " << tablets_moved_to_write_pending_nodes_.size(); + } + } else { + } + + if (scan_pending_nodes_index_.find(dest_node_index) != scan_pending_nodes_index_.end() && + dest_node_index != initial_tablet_index_to_node_index_[tablet_index]) { + tablets_moved_to_scan_pending_nodes_.insert(tablet_index); + VLOG(20) << "[lb] add tablet moved to scan pending node, tablet index: " << tablet_index + << ", node index: " << dest_node_index + << ", tablets_moved_to_scan_pending_nodes_ size: " << tablets_moved_to_scan_pending_nodes_.size(); + } else if (scan_pending_nodes_index_.find(source_node_index) != scan_pending_nodes_index_.end()) { + if (tablets_moved_to_scan_pending_nodes_.find(tablet_index) != tablets_moved_to_scan_pending_nodes_.end()) { + tablets_moved_to_scan_pending_nodes_.erase(tablet_index); + VLOG(20) << "[lb] remove tablet moved to scan pending nodes, tablet index: " << tablet_index + << ", tablets_moved_to_scan_pending_nodes_ size: " << tablets_moved_to_scan_pending_nodes_.size(); + } + } else { + } +} + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/cluster.h b/src/load_balancer/cluster.h new file mode 100644 index 000000000..8a22acd7c --- /dev/null +++ b/src/load_balancer/cluster.h @@ -0,0 +1,130 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_CLUSTER_H_ +#define TERA_LOAD_BALANCER_CLUSTER_H_ + +#include +#include +#include +#include + +#include "load_balancer/action.h" +#include "load_balancer/lb_node.h" +#include "load_balancer/options.h" +#include "master/tablet_manager.h" +#include "master/tabletnode_manager.h" + +namespace tera { +namespace load_balancer { + +class Cluster { +public: + Cluster(const std::vector>& tablet_nodes, + const LBOptions& options); + + virtual ~Cluster(); + + void DebugCluster(); + + bool ValidAction(const std::shared_ptr& action); + + void DoAction(const std::shared_ptr& action); + + void SortNodesByTabletCount(); + + void SortNodesBySize(); + + void SortNodesByReadLoad(); + + void SortNodesByWriteLoad(); + + void SortNodesByScanLoad(); + +private: + void RegisterTablet(const std::shared_ptr& tablet, uint32_t tablet_index, uint32_t node_index); + void AddTablet(uint32_t tablet_index, uint32_t to_node_index); + void RemoveTablet(uint32_t tablet_index, uint32_t from_node_index); + void MoveTablet(uint32_t tablet_index, uint32_t source_node_index, uint32_t dest_node_index); + +// cluster info, use index to speed up the calculation +// make these info public also for speeding up +public: + uint32_t table_num_; + uint32_t tablet_node_num_; + uint32_t tablet_num_; + uint32_t tablet_moved_num_; + + // table_index -> table + std::map tables_; + // node_index -> node + std::map> nodes_; + // tablet_index -> tablet + std::map> tablets_; + + // table -> table_index + std::map tables_to_index_; + // node -> node_index + std::map nodes_to_index_; + // tablet -> tablet_index + std::map tablets_to_index_; + + // tablet_index -> node_index + std::map tablet_index_to_node_index_; + // initial tablet_index -> node_index, it's the initial cluster state + std::map initial_tablet_index_to_node_index_; + // tablet_index -> table_index + std::map tablet_index_to_table_index_; + + // node_index -> tablets index on the node + std::map> tablets_per_node_; + // node_index -> tablets index of not ready on the node + std::map> initial_tablets_not_ready_per_node_; + // abnormal nodes index + std::unordered_set abnormal_nodes_index_; + // index of tablets moved to abnormal nodes + std::unordered_set tablets_moved_to_abnormal_nodes_; + // read pending nodes index + std::unordered_set read_pending_nodes_index_; + // index of tablets moved to read pending nodes + std::unordered_set tablets_moved_to_read_pending_nodes_; + // write pending nodes index + std::unordered_set write_pending_nodes_index_; + // index of tablets moved to write pending nodes + std::unordered_set tablets_moved_to_write_pending_nodes_; + // scan pending nodes index + std::unordered_set scan_pending_nodes_index_; + // index of tablets moved to scan pending nodes + std::unordered_set tablets_moved_to_scan_pending_nodes_; + // node_index -> data size on the node + std::map size_per_node_; + // node_index -> read load on the node + std::map read_load_per_node_; + // node_index -> write load on the node + std::map write_load_per_node_; + // node_index -> scan load on the node + std::map scan_load_per_node_; + // tablets index of moved too frequently + std::unordered_set tablets_moved_too_frequently_; + + // meta table node index + uint32_t meta_table_node_index_; + + // for ActionGenerator + std::vector node_index_sorted_by_tablet_count_; + std::vector node_index_sorted_by_size_; + std::vector node_index_sorted_by_read_load_; + std::vector node_index_sorted_by_write_load_; + std::vector node_index_sorted_by_scan_load_; + + LBOptions lb_options_; + +private: + std::vector> lb_nodes_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_CLUSTER_H_ diff --git a/src/load_balancer/cost_function.h b/src/load_balancer/cost_function.h new file mode 100644 index 000000000..862b09285 --- /dev/null +++ b/src/load_balancer/cost_function.h @@ -0,0 +1,125 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_COST_FUNCTION_H_ +#define TERA_LOAD_BALANCER_COST_FUNCTION_H_ + +#include + +#include +#include +#include +#include +#include + +#include "glog/logging.h" +#include "load_balancer/cluster.h" +#include "load_balancer/options.h" + +namespace tera { +namespace load_balancer { + +class CostFunction { +public: + CostFunction(const LBOptions& options, const std::string& name) : + lb_options_(options), + name_(name) { + + } + + virtual ~CostFunction() {} + + virtual double Cost() = 0; + + virtual void Init(const std::shared_ptr& cluster) { + cluster_ = cluster; + } + + double GetWeight() const { + return weight_; + } + + void SetWeight(double w) { + weight_ = w; + } + + std::string Name() const { + return name_; + } + +protected: + double Scale(double min, double max, double value) { + VLOG(20) << "[lb] Scale begin, min:" << min << " max:" << max << " value:" << value; + if (max <= min || value <= min) { + return 0.0; + } + if (max - min == 0) { + return 0.0; + } + + double scaled = std::max(0.0, std::min(1.0, (value - min) / (max - min))); + VLOG(20) << "[lb] Scale end, scaled:" << scaled; + return scaled; + } + + double ScaleFromArray(const std::vector& stats) { + if (lb_options_.debug_mode_enabled) { + std::string line; + for (const auto& s : stats) { + line += std::to_string(s); + line += " "; + } + LOG(INFO) << "[lb] stats:" << line; + } + + double total_cost = 0; + double total = GetSum(stats); + + double count = stats.size(); + double mean = total/count; + + double max = ((count - 1) * mean) + (total - mean); + + double min; + if (count > total) { + min = ((count - total) * mean) + ((1 - mean) * total); + } else { + int num_high = (int) (total - (floor(mean) * count)); + int num_low = (int) (count - num_high); + + min = (num_high * (ceil(mean) - mean)) + (num_low * (mean - floor(mean))); + + } + min = std::max(0.0, min); + for (size_t i = 0; i < stats.size(); i++) { + double n = stats[i]; + double diff = std::abs(mean - n); + total_cost += diff; + } + + return Scale(min, max, total_cost); + } + +private: + double GetSum(const std::vector& stats) { + double total = 0; + for (const auto& s : stats) { + total += s; + } + return total; + } + +protected: + std::shared_ptr cluster_; + +private: + double weight_; + LBOptions lb_options_; + std::string name_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_COST_FUNCTION_H_ diff --git a/src/load_balancer/cost_functions.cc b/src/load_balancer/cost_functions.cc new file mode 100644 index 000000000..e459b4337 --- /dev/null +++ b/src/load_balancer/cost_functions.cc @@ -0,0 +1,222 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "load_balancer/cost_functions.h" + +#include +#include + +namespace tera { +namespace load_balancer { + +MoveCountCostFunction::MoveCountCostFunction (const LBOptions& options) : + CostFunction(options, "MoveCountCostFunction"), + kExpensiveCost(1000000), + tablet_max_move_num_(options.tablet_max_move_num), + tablet_max_move_percent_(options.tablet_max_move_percent) { + SetWeight(options.move_count_cost_weight); +} + +MoveCountCostFunction::~MoveCountCostFunction() { +} + +double MoveCountCostFunction::Cost() { + uint32_t max_move_num = std::max(tablet_max_move_num_, static_cast(cluster_->tablet_num_ * tablet_max_move_percent_)); + double cost = cluster_->tablet_moved_num_; + if (cost > static_cast(max_move_num)) { + // return an expensive cost + VLOG(20) << "[lb] reach max move num limit, max_move_num:" << max_move_num; + return kExpensiveCost; + } + + return Scale(0, std::min(cluster_->tablet_num_, max_move_num), cost); +} + +MoveFrequencyCostFunction::MoveFrequencyCostFunction(const LBOptions& options) : + CostFunction(options, "MoveFrequencyCostFunction"), + kExpensiveCost(100000) { + SetWeight(options.move_frequency_cost_weight); +} + +MoveFrequencyCostFunction::~MoveFrequencyCostFunction() { +} + +double MoveFrequencyCostFunction::Cost() { + if (cluster_->tablets_moved_too_frequently_.size() > 0) { + // there are tablets moved too frequently, return an expensive cost + VLOG(20) << "[lb] there are " << cluster_->tablets_moved_too_frequently_.size() + << " tablets moved too frequently"; + return kExpensiveCost; + } else { + return 0; + } +} + +AbnormalNodeCostFunction::AbnormalNodeCostFunction(const LBOptions& options) : + CostFunction(options, "AbnormalNodeCostFunction"), + kExpensiveCost(100000) { + SetWeight(options.abnormal_node_cost_weight); +} + +AbnormalNodeCostFunction::~AbnormalNodeCostFunction() { +} + +double AbnormalNodeCostFunction::Cost() { + if (cluster_->tablets_moved_to_abnormal_nodes_.size() > 0) { + // there are tablets moved to abnormal nodes, return an expensive cost + VLOG(20) << "[lb] there are " << cluster_->tablets_moved_to_abnormal_nodes_.size() + << " tablets moved to abnormal nodes"; + return kExpensiveCost; + } else { + return 0; + } +} + +ReadPendingNodeCostFunction::ReadPendingNodeCostFunction(const LBOptions& options) : + CostFunction(options, "ReadPendingNodeCostFunction"), + kExpensiveCost(10000) { + SetWeight(options.read_pending_node_cost_weight); +} + +ReadPendingNodeCostFunction::~ReadPendingNodeCostFunction() { +} + +double ReadPendingNodeCostFunction::Cost() { + if (cluster_->tablets_moved_to_read_pending_nodes_.size() > 0) { + // there are tablets moved to read pending nodes, return an expensive cost + VLOG(20) << "[lb] there are " << cluster_->tablets_moved_to_read_pending_nodes_.size() + << " tablets moved to read pending nodes"; + return kExpensiveCost; + } else { + return 0; + } +} + +WritePendingNodeCostFunction::WritePendingNodeCostFunction(const LBOptions& options) : + CostFunction(options, "WritePendingNodeCostFunction"), + kExpensiveCost(10000) { + SetWeight(options.write_pending_node_cost_weight); +} + +WritePendingNodeCostFunction::~WritePendingNodeCostFunction() { +} + +double WritePendingNodeCostFunction::Cost() { + if (cluster_->tablets_moved_to_write_pending_nodes_.size() > 0) { + // there are tablets moved to write pending nodes, return an expensive cost + VLOG(20) << "[lb] there are " << cluster_->tablets_moved_to_write_pending_nodes_.size() + << " tablets moved to write pending nodes"; + return kExpensiveCost; + } else { + return 0; + } +} + +ScanPendingNodeCostFunction::ScanPendingNodeCostFunction(const LBOptions& options) : + CostFunction(options, "ScanPendingNodeCostFunction"), + kExpensiveCost(10000) { + SetWeight(options.scan_pending_node_cost_weight); +} + +ScanPendingNodeCostFunction::~ScanPendingNodeCostFunction() { +} + +double ScanPendingNodeCostFunction::Cost() { + if (cluster_->tablets_moved_to_scan_pending_nodes_.size() > 0) { + // there are tablets moved to scan pending nodes, return an expensive cost + VLOG(20) << "[lb] there are " << cluster_->tablets_moved_to_scan_pending_nodes_.size() + << " tablets moved to scan pending nodes"; + return kExpensiveCost; + } else { + return 0; + } +} + +TabletCountCostFunction::TabletCountCostFunction (const LBOptions& options) : + CostFunction(options, "TabletCountCostFunction") { + SetWeight(options.tablet_count_cost_weight); +} + +TabletCountCostFunction::~TabletCountCostFunction() { +} + +double TabletCountCostFunction::Cost() { + std::vector tablet_nums_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + tablet_nums_per_node.emplace_back(cluster_->tablets_per_node_[i].size()); + } + + return ScaleFromArray(tablet_nums_per_node); +} + +SizeCostFunction::SizeCostFunction (const LBOptions& options) : + CostFunction(options, "SizeCostFunction") { + SetWeight(options.size_cost_weight); +} + +SizeCostFunction::~SizeCostFunction() { +} + +double SizeCostFunction::Cost() { + std::vector size_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + size_per_node.emplace_back(cluster_->size_per_node_[i]); + } + + return ScaleFromArray(size_per_node); +} + +ReadLoadCostFunction::ReadLoadCostFunction (const LBOptions& options) : + CostFunction(options, "ReadLoadCostFunction") { + SetWeight(options.read_load_cost_weight); +} + +ReadLoadCostFunction::~ReadLoadCostFunction() { +} + +double ReadLoadCostFunction::Cost() { + std::vector read_load_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + read_load_per_node.emplace_back(cluster_->read_load_per_node_[i]); + } + + return ScaleFromArray(read_load_per_node); +} + +WriteLoadCostFunction::WriteLoadCostFunction (const LBOptions& options) : + CostFunction(options, "WriteLoadCostFunction") { + SetWeight(options.write_load_cost_weight); +} + +WriteLoadCostFunction::~WriteLoadCostFunction() { +} + +double WriteLoadCostFunction::Cost() { + std::vector write_load_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + write_load_per_node.emplace_back(cluster_->write_load_per_node_[i]); + } + + return ScaleFromArray(write_load_per_node); +} + +ScanLoadCostFunction::ScanLoadCostFunction (const LBOptions& options) : + CostFunction(options, "ScanLoadCostFunction") { + SetWeight(options.scan_load_cost_weight); +} + +ScanLoadCostFunction::~ScanLoadCostFunction() { +} + +double ScanLoadCostFunction::Cost() { + std::vector scan_load_per_node; + for (uint32_t i = 0; i < cluster_->tablet_node_num_; ++i) { + scan_load_per_node.emplace_back(cluster_->scan_load_per_node_[i]); + } + + return ScaleFromArray(scan_load_per_node); +} + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/cost_functions.h b/src/load_balancer/cost_functions.h new file mode 100644 index 000000000..5f977275a --- /dev/null +++ b/src/load_balancer/cost_functions.h @@ -0,0 +1,135 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_COST_FUNCTIONS_H_ +#define TERA_LOAD_BALANCER_COST_FUNCTIONS_H_ + +#include "load_balancer/cost_function.h" + +namespace tera { +namespace load_balancer { + +// moving too many tablets will cost high +class MoveCountCostFunction : public CostFunction { +public: + MoveCountCostFunction(const LBOptions& options); + virtual ~MoveCountCostFunction(); + + virtual double Cost() override; + +private: + const double kExpensiveCost; + uint32_t tablet_max_move_num_; + double tablet_max_move_percent_; +}; + +// moving tablet oo frequently will cost high +class MoveFrequencyCostFunction : public CostFunction { +public: + MoveFrequencyCostFunction(const LBOptions& options); + virtual ~MoveFrequencyCostFunction(); + + virtual double Cost() override; + +private: + const double kExpensiveCost; +}; + +// moving a tablet to an abnormal node will cost high +class AbnormalNodeCostFunction : public CostFunction { +public: + AbnormalNodeCostFunction(const LBOptions& options); + virtual ~AbnormalNodeCostFunction(); + + virtual double Cost() override; + +private: + const double kExpensiveCost; +}; + +// moving a tablet to a read pending node will cost high +class ReadPendingNodeCostFunction : public CostFunction { +public: + ReadPendingNodeCostFunction(const LBOptions& options); + virtual ~ReadPendingNodeCostFunction(); + + virtual double Cost() override; + +private: + const double kExpensiveCost; +}; + +// moving a tablet to a write pending node will cost high +class WritePendingNodeCostFunction : public CostFunction { +public: + WritePendingNodeCostFunction(const LBOptions& options); + virtual ~WritePendingNodeCostFunction(); + + virtual double Cost() override; + +private: + const double kExpensiveCost; +}; + +// moving a tablet to a scan pending node will cost high +class ScanPendingNodeCostFunction : public CostFunction { +public: + ScanPendingNodeCostFunction(const LBOptions& options); + virtual ~ScanPendingNodeCostFunction(); + + virtual double Cost() override; + +private: + const double kExpensiveCost; +}; + +// balance the tablets num for each tablet node +class TabletCountCostFunction : public CostFunction { +public: + TabletCountCostFunction(const LBOptions& options); + virtual ~TabletCountCostFunction(); + + virtual double Cost() override; +}; + +// banlance the data size for each tablet node +class SizeCostFunction : public CostFunction { +public: + SizeCostFunction(const LBOptions& options); + virtual ~SizeCostFunction(); + + virtual double Cost() override; +}; + +// banlance the read load for each tablet node +class ReadLoadCostFunction : public CostFunction { +public: + ReadLoadCostFunction(const LBOptions& options); + virtual ~ReadLoadCostFunction(); + + virtual double Cost() override; +}; + +// banlance the write load for each tablet node +class WriteLoadCostFunction : public CostFunction { +public: + WriteLoadCostFunction(const LBOptions& options); + virtual ~WriteLoadCostFunction(); + + virtual double Cost() override; +}; + +// banlance the scan load for each tablet node +class ScanLoadCostFunction : public CostFunction { +public: + ScanLoadCostFunction(const LBOptions& options); + virtual ~ScanLoadCostFunction(); + + virtual double Cost() override; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_COST_FUNCTIONS_H_ diff --git a/src/load_balancer/lb_entry.cc b/src/load_balancer/lb_entry.cc new file mode 100644 index 000000000..abf0b3ad6 --- /dev/null +++ b/src/load_balancer/lb_entry.cc @@ -0,0 +1,74 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "load_balancer/lb_entry.h" + +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "common/net/ip_address.h" +#include "common/this_thread.h" +#include "load_balancer/lb_impl.h" +#include "load_balancer/lb_service_impl.h" + +DECLARE_string(tera_lb_server_addr); +DECLARE_string(tera_lb_server_port); + +std::string GetTeraEntryName() { + return "load_balancer"; +} + +tera::TeraEntry* GetTeraEntry() { + return new tera::load_balancer::LBEntry(); +} + +namespace tera { +namespace load_balancer { + +LBEntry::LBEntry() : + rpc_server_(nullptr), + lb_service_impl_(nullptr), + lb_impl_(nullptr) { + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); +} + +LBEntry::~LBEntry() { +} + +bool LBEntry::StartServer() { + IpAddress lb_addr(FLAGS_tera_lb_server_addr, FLAGS_tera_lb_server_port); + LOG(INFO) << "Start load balancer RPC server at: " << lb_addr.ToString(); + + lb_impl_.reset(new LBImpl()); + lb_service_impl_ = new LBServiceImpl(lb_impl_); + + if (!lb_impl_->Init()) { + return false; + } + + rpc_server_->RegisterService(lb_service_impl_); + if (!rpc_server_->Start(lb_addr.ToString())) { + LOG(ERROR) << "start RPC server error"; + return false; + } + + LOG(INFO) << "finish starting load balancer server"; + return true; +} + +bool LBEntry::Run() { + ThisThread::Sleep(1000); + return true; +} + +void LBEntry::ShutdownServer() { + rpc_server_->Stop(); +} + +} // namespace load_balancer +} // namespace tera + diff --git a/src/load_balancer/lb_entry.h b/src/load_balancer/lb_entry.h new file mode 100644 index 000000000..03399bc00 --- /dev/null +++ b/src/load_balancer/lb_entry.h @@ -0,0 +1,38 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_LB_ENTRY_H_ +#define TERA_LOAD_BALANCER_LB_ENTRY_H_ + +#include + +#include "sofa/pbrpc/pbrpc.h" + +#include "tera_entry.h" + +namespace tera { +namespace load_balancer { + +class LBServiceImpl; +class LBImpl; + +class LBEntry : public TeraEntry { +public: + LBEntry(); + virtual ~LBEntry(); + + virtual bool StartServer(); + virtual bool Run(); + virtual void ShutdownServer(); + +private: + std::unique_ptr rpc_server_; + LBServiceImpl* lb_service_impl_; + std::shared_ptr lb_impl_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_LB_ENTRY_H_ diff --git a/src/load_balancer/lb_impl.cc b/src/load_balancer/lb_impl.cc new file mode 100644 index 000000000..690528531 --- /dev/null +++ b/src/load_balancer/lb_impl.cc @@ -0,0 +1,531 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "load_balancer/lb_impl.h" + +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "load_balancer/unity_balancer.h" +#include "proto/tabletnode.pb.h" +#include "tera.h" +#include "common/timer.h" + +DECLARE_bool(tera_master_meta_isolate_enabled); +DECLARE_string(tera_master_meta_table_name); +DECLARE_int32(tera_lb_impl_thread_num); +DECLARE_int32(tera_lb_load_balance_period_s); +DECLARE_int32(tera_lb_max_compute_steps); +DECLARE_int32(tera_lb_max_compute_steps_per_tablet); +DECLARE_int32(tera_lb_max_compute_time_ms); +DECLARE_double(tera_lb_min_cost_need_balance); +DECLARE_double(tera_lb_move_count_cost_weight); +DECLARE_int32(tera_lb_tablet_max_move_num); +DECLARE_double(tera_lb_tablet_max_move_percent); +DECLARE_double(tera_lb_move_frequency_cost_weight); +DECLARE_int32(tera_lb_tablet_move_too_frequently_threshold_s); +DECLARE_double(tera_lb_abnormal_node_cost_weight); +DECLARE_double(tera_lb_abnormal_node_ratio); +DECLARE_double(tera_lb_read_pending_node_cost_weight); +DECLARE_double(tera_lb_write_pending_node_cost_weight); +DECLARE_double(tera_lb_scan_pending_node_cost_weight); +DECLARE_double(tera_lb_tablet_count_cost_weight); +DECLARE_double(tera_lb_size_cost_weight); +DECLARE_double(tera_lb_read_load_cost_weight); +DECLARE_double(tera_lb_write_load_cost_weight); +DECLARE_double(tera_lb_scan_load_cost_weight); +DECLARE_bool(tera_lb_debug_mode_enabled); + +using tera::master::NodeState; +using tera::master::Table; +using tera::master::TablePtr; +using tera::master::Tablet; +using tera::master::TabletPtr; +using tera::master::TabletNode; +using tera::master::TabletNodePtr; + +namespace tera { +namespace load_balancer { + +LBImpl::LBImpl() : + thread_pool_(new ThreadPool(FLAGS_tera_lb_impl_thread_num)), + sdk_client_(nullptr), + safemode_(true), + round_(0), + lb_debug_mode_(FLAGS_tera_lb_debug_mode_enabled) { +} + +LBImpl::~LBImpl() { +} + +bool LBImpl::Init() { + if (lb_debug_mode_) { + LOG(INFO) << "[lb] debug mode enabled"; + } + + // tera_entry has init glog + Client::SetGlogIsInitialized(); + + sdk_client_.reset(Client::NewClient()); + if (!sdk_client_) { + LOG(ERROR) << "[lb] open sdk client fail"; + return false; + } + + ScheduleLoadBalance(); + + return true; +} + +void LBImpl::ScheduleLoadBalance() { + int schedule_period = FLAGS_tera_lb_load_balance_period_s * 1000; + VLOG(5) << "[lb] LoadBalance will be scheduled in: " << FLAGS_tera_lb_load_balance_period_s << "s"; + thread_pool_->DelayTask(schedule_period, + [this](int64_t) { + DoLoadBalance(); + ScheduleLoadBalance(); + } + ); +} + +void LBImpl::DoLoadBalance() { + ++round_; + VLOG(5) << "[lb] LoadBalance begin round: " << round_; + int64_t start_time = get_micros(); + + std::vector tablet_nodes; + std::vector tables; + std::vector tablets; + if (!Collect(&tablet_nodes, &tables, &tablets)) { + return; + } + + if (lb_debug_mode_) { + DebugCollect(tablet_nodes, tables, tablets); + } + + std::vector> lb_nodes; + CreateLBInput(tables, tablet_nodes, tablets, &lb_nodes); + + if (lb_debug_mode_) { + DebugLBNode(lb_nodes); + } + + LBOptions options; + options.max_compute_steps = FLAGS_tera_lb_max_compute_steps; + options.max_compute_steps_per_tablet = FLAGS_tera_lb_max_compute_steps_per_tablet; + options.max_compute_time_ms = FLAGS_tera_lb_max_compute_time_ms; + options.min_cost_need_balance = FLAGS_tera_lb_min_cost_need_balance; + options.move_count_cost_weight = FLAGS_tera_lb_move_count_cost_weight; + options.tablet_max_move_num = FLAGS_tera_lb_tablet_max_move_num; + options.tablet_max_move_percent = FLAGS_tera_lb_tablet_max_move_percent; + options.move_frequency_cost_weight = FLAGS_tera_lb_move_frequency_cost_weight; + options.tablet_move_too_frequently_threshold_s = FLAGS_tera_lb_tablet_move_too_frequently_threshold_s; + options.abnormal_node_cost_weight = FLAGS_tera_lb_abnormal_node_cost_weight; + options.abnormal_node_ratio = FLAGS_tera_lb_abnormal_node_ratio; + options.read_pending_node_cost_weight = FLAGS_tera_lb_read_pending_node_cost_weight; + options.write_pending_node_cost_weight = FLAGS_tera_lb_write_pending_node_cost_weight; + options.scan_pending_node_cost_weight = FLAGS_tera_lb_scan_pending_node_cost_weight; + options.tablet_count_cost_weight = FLAGS_tera_lb_tablet_count_cost_weight; + options.size_cost_weight = FLAGS_tera_lb_size_cost_weight; + options.read_load_cost_weight = FLAGS_tera_lb_read_load_cost_weight; + options.write_load_cost_weight = FLAGS_tera_lb_write_load_cost_weight; + options.scan_load_cost_weight = FLAGS_tera_lb_scan_load_cost_weight; + options.meta_table_isolate_enabled = FLAGS_tera_master_meta_isolate_enabled; + options.meta_table_name = FLAGS_tera_master_meta_table_name; + options.meta_table_node_addr = GetMetaNodeAddr(); + options.debug_mode_enabled = lb_debug_mode_; + + std::unique_ptr balancer(new UnityBalancer(options)); + std::vector plans; + if (!balancer->BalanceCluster(lb_nodes, &plans)) { + LOG(WARNING) << "[lb] LoadBalance failed"; + return; + } + + DebugPlan(plans); + + if (!IsSafemode()) { + bool master_safe_mode = true; + bool get_success = GetMasterSafemode(&master_safe_mode); + + if (get_success && !master_safe_mode) { + ExecutePlan(plans); + } else if (!get_success) { + VLOG(5) << "[lb] skip execute plan due to fail to get master safe mode"; + } else if (master_safe_mode) { + VLOG(5) << "[lb] skip execute plan due to master is in safe mode"; + } else { + } + } else { + VLOG(5) << "[lb] skip execute plan in safe mode"; + } + + int64_t cost_time = get_micros() - start_time; + VLOG(5) << "[lb] LoadBalance end round: " << round_ + <<", cost: " << cost_time / 1000.0 << "ms"; +} + +bool LBImpl::CreateLBInput( + const std::vector& tables, + const std::vector& nodes, + const std::vector& tablets, + std::vector>* lb_nodes) { + lb_nodes->clear(); + + std::map> nodes_map; + for (const auto& node : nodes) { + LBTabletNode* p_lb_node = new LBTabletNode(); + p_lb_node->tablet_node_ptr = node; + nodes_map[node->GetAddr()].reset(p_lb_node); + } + + for (const auto& tablet : tablets) { + std::string addr = tablet->GetServerAddr(); + if (nodes_map.find(addr) != nodes_map.end()) { + LBTablet* p_lb_tablet = new LBTablet(); + p_lb_tablet->tablet_ptr = tablet; + std::shared_ptr lb_tablet(p_lb_tablet); + nodes_map[addr]->tablets.emplace_back(lb_tablet); + } else { + // TODO + // unassigned tablet, skip now + } + } + + for (const auto& pair : nodes_map) { + lb_nodes->emplace_back(pair.second); + } + + return true; +} + +bool LBImpl::Collect(std::vector* nodes, + std::vector* tables, + std::vector* tablets) { + if (nodes == nullptr || tables == nullptr || tablets == nullptr) { + return false; + } + nodes->clear(); + tables->clear(); + tablets->clear(); + + int64_t start_time = get_micros(); + + if (!CollectNodes(nodes)) { + LOG(ERROR) << "[lb] collect nodes fail"; + return false; + } + + if (!CollectTablets(tables, tablets)) { + LOG(ERROR) << "[lb] collect tablets fail"; + return false; + } + + int64_t cost_time = get_micros() - start_time; + VLOG(5) << "[lb] Collect cost: " << cost_time / 1000.0 << "ms"; + + return true; +} + +bool LBImpl::CollectNodes(std::vector* nodes) { + tera::ClientImpl* client_impl = static_cast(sdk_client_.get()); + std::vector infos; + ErrorCode err; + if (!client_impl->ShowTabletNodesInfo(&infos, &err)) { + LOG(ERROR) << "[lb] fail to get TabletNodeInfo, err: " << err.ToString(); + return false; + } + + for (const auto& info : infos) { + TabletNodePtr node(new TabletNode()); + NodeInfoToNode(info, node); + nodes->push_back(node); + } + + VLOG(5) << "[lb] collected node size: " << nodes->size(); + + return true; +} + +bool LBImpl::NodeInfoToNode(const TabletNodeInfo& info, TabletNodePtr node) { + node->info_ = info; + + node->addr_ = info.addr(); + node->state_ = StringToNodeState(info.status_m()); + node->data_size_ = info.load(); + node->average_counter_.read_pending_ = info.read_pending(); + node->average_counter_.write_pending_ = info.write_pending(); + node->average_counter_.scan_pending_ = info.scan_pending(); + + return true; +} + +NodeState LBImpl::StringToNodeState(const std::string& str) { + if (str == "kReady") { + return tera::master::kReady; + } else if (str == "kOffLine") { + return tera::master::kOffLine; + } else if (str == "kOnKick") { + return tera::master::kOnKick; + } else if (str == "kWaitKick") { + return tera::master::kWaitKick; + } else { + return tera::master::kOffLine; + } +} + +bool LBImpl::CollectTablets(std::vector* tables, + std::vector* tablets) { + tera::ClientImpl* client_impl = static_cast(sdk_client_.get()); + TableMetaList table_list; + TabletMetaList tablet_list; + bool is_brief = false; + ErrorCode err; + if (!client_impl->ShowTablesInfo(&table_list, &tablet_list, is_brief, &err)) { + LOG(ERROR) << "[lb] fail to get tablets, err: " << err.ToString(); + return false; + } + + std::map table_name_to_ptr; + + for (int i = 0; i < table_list.meta_size(); ++i) { + std::string table_name = table_list.meta(i).table_name(); + TablePtr table(new tera::master::Table(table_name)); + TableMetaToTable(table_list.meta(i), table); + tables->push_back(table); + + if (table_name_to_ptr.find(table_name) == table_name_to_ptr.end()) { + table_name_to_ptr[table_name] = table; + } + } + + if (tablet_list.meta_size() != tablet_list.counter_size()) { + LOG(ERROR) << "[lb] invalid TabletMetaList, meta size: " << tablet_list.meta_size() + << " counter size: " << tablet_list.counter_size(); + return false; + } + for (int i = 0; i < tablet_list.meta_size(); ++i) { + std::string table_name = tablet_list.meta(i).table_name(); + if (table_name_to_ptr.find(table_name) == table_name_to_ptr.end()) { + LOG(WARNING) << "[lb] tablet's table not exist " << "tablet path: " + << tablet_list.meta(i).path() << "table: " << table_name; + continue; + } + TabletPtr tablet(new tera::master::Tablet(tablet_list.meta(i), table_name_to_ptr[table_name])); + tablet->SetCounter(tablet_list.counter(i)); + if (tablet_list.meta(i).has_last_move_time_us()) { + tablet->SetLastMoveTime(tablet_list.meta(i).last_move_time_us()); + } else { + // !!! compatible with old master + // !!! set last move time to 0 will disable the MoveFrequencyCostFunction strategy + tablet->SetLastMoveTime(0); + } + tablets->push_back(tablet); + + if (table_name == FLAGS_tera_master_meta_table_name) { + SetMetaNodeAddr(tablet->GetServerAddr()); + VLOG(5) << "[lb] meta table node addr: " << GetMetaNodeAddr(); + } + } + + VLOG(5) << "[lb] collected table size: " << tables->size(); + VLOG(5) << "[lb] collected tablet size: " << tablets->size(); + + return true; +} + +bool LBImpl::TableMetaToTable(const TableMeta& meta, TablePtr table) { + table->SetStatus(meta.status()); + table->SetSchema(meta.schema()); + + return true; +} + +void LBImpl::DebugCollect(const std::vector& nodes, + const std::vector& tables, + const std::vector& tablets) { + LOG(INFO) << ""; + LOG(INFO) << "[lb] DebugCollect begin -----"; + + LOG(INFO) << "[lb] " << tables.size() << " table:" ; + for (const auto& table : tables) { + LOG(INFO) << "table:" + table->GetTableName() + << " status:" << StatusCodeToString(table->GetStatus()); + } + + LOG(INFO) << "[lb] " << nodes.size() << " node:"; + for (const auto& node : nodes) { + LOG(INFO) << "addr:" + node->GetAddr() + << " state:" << tera::master::NodeStateToString(node->GetState()) + << " size:" << node->GetSize() << "B" + << " r_pending:" << node->GetReadPending() + << " w_pending:" << node->GetWritePending() + << " s_pending:" << node->GetScanPending(); + } + + LOG(INFO) << "[lb] " << tablets.size() << " tablet:"; + for (const auto& tablet : tablets) { + LOG(INFO) << "path:" + tablet->GetPath() + << " status:" << StatusCodeToString(tablet->GetStatus()) + << " server:" << tablet->GetServerAddr() + << " table:" << tablet->GetTableName() + << " last_move_time_us:" << tablet->LastMoveTime(); + } + + LOG(INFO) << "[lb] DebugCollect end -----"; + LOG(INFO) << ""; +} + +void LBImpl::DebugLBNode(const std::vector>& lb_nodes) { + LOG(INFO) << ""; + LOG(INFO) << "[lb] DebugLBNode begin -----"; + LOG(INFO) << "[lb] " << lb_nodes.size() << " lb_nodes:" ; + + for (const auto& node : lb_nodes) { + LOG(INFO) << "[lb] " << node->tablet_node_ptr->GetAddr() << ":"; + for (const auto& lb_tablet : node->tablets) { + LOG(INFO) << "[lb] " << lb_tablet->tablet_ptr->GetPath(); + } + } + + LOG(INFO) << "[lb] DebugLBNode end -----"; + LOG(INFO) << ""; +} + +void LBImpl::DebugPlan(const std::vector& plans) { + VLOG(5) << ""; + VLOG(5) << "[lb] DebugPlan begin ----"; + VLOG(5) << plans.size() << " plans:"; + + for (const auto& plan : plans) { + VLOG(5) << "[lb] " + plan.ToString(); + } + + VLOG(5) << "[lb] DebugPlan end ----"; + VLOG(5) << ""; +} + +void LBImpl::ExecutePlan(const std::vector& plans) { + tera::ClientImpl* client_impl = static_cast(sdk_client_.get()); + for (const auto& plan : plans) { + std::string tablet_path = plan.TabletPath(); + std::string dest_addr = plan.DestAddr(); + + std::vector arg_list; + arg_list.emplace_back("move"); + arg_list.emplace_back(tablet_path); + arg_list.emplace_back(dest_addr); + + ErrorCode err; + if (!client_impl->CmdCtrl("tablet", arg_list, nullptr, nullptr, &err)) { + LOG(ERROR) << "[lb] fail to execute plan:" << plan.ToString() << err.ToString(); + } else { + VLOG(5) << "[lb] execute plan success:" << plan.ToString(); + } + } +} + +bool LBImpl::IsSafemode() const { + MutexLock lock(&mutex_); + return safemode_; +} + +bool LBImpl::SetSafemode(bool value) { + MutexLock lock(&mutex_); + safemode_ = value; + + if (value) { + LOG(INFO) << "[lb] LoadBanlacer enter safemode"; + } else { + LOG(INFO) << "[lb] LoadBanlacer leave safemode"; + } + + return true; +} + +bool LBImpl::GetMasterSafemode(bool* safe_mode) { + if (safe_mode == nullptr) { + return false; + } + + std::string op = "get"; + std::vector arg_list; + arg_list.push_back(op); + + tera::ClientImpl* client_impl = static_cast(sdk_client_.get()); + ErrorCode err; + if (!client_impl->CmdCtrl("safemode", arg_list, safe_mode, NULL, &err)) { + LOG(ERROR) << "[lb] fail to " << op << " master safemode" << err.ToString(); + return false; + } + + VLOG(20) << "[lb] master safemode: " << *safe_mode; + return true; +} + +std::string LBImpl::GetMetaNodeAddr() const { + MutexLock lock(&mutex_); + return meta_node_addr_; +} + +bool LBImpl::SetMetaNodeAddr(const std::string& addr) { + MutexLock lock(&mutex_); + meta_node_addr_ = addr; + return true; +} + +void LBImpl::CmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response, + google::protobuf::Closure* done) { + std::string cmd_line; + for (int32_t i = 0; i < request->arg_list_size(); i++) { + cmd_line += request->arg_list(i); + if (i != request->arg_list_size() - 1) { + cmd_line += " "; + } + } + LOG(INFO) << "[lb] receive cmd: " << request->command() << " " << cmd_line; + + response->set_sequence_id(request->sequence_id()); + + if (request->command() == "safemode") { + SafeModeCmdCtrl(request, response); + } else { + response->set_status(kInvalidArgument); + } + + done->Run(); + return; +} + +void LBImpl::SafeModeCmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response) { + if (request->arg_list_size() != 1) { + response->set_status(kInvalidArgument); + return; + } + + if (request->arg_list(0) == "enter") { + SetSafemode(true); + response->set_status(kLoadBalancerOk); + } else if (request->arg_list(0) == "leave") { + SetSafemode(false); + response->set_status(kLoadBalancerOk); + } else if (request->arg_list(0) == "get") { + response->set_bool_result(IsSafemode()); + response->set_status(kLoadBalancerOk); + } else { + response->set_status(kInvalidArgument); + } +} + +} // namespace load_balancer +} // namespace tera + diff --git a/src/load_balancer/lb_impl.h b/src/load_balancer/lb_impl.h new file mode 100644 index 000000000..2e2abe88d --- /dev/null +++ b/src/load_balancer/lb_impl.h @@ -0,0 +1,93 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_LB_IMPL_H_ +#define TERA_LOAD_BALANCER_LB_IMPL_H_ + +#include +#include +#include +#include + +#include "common/mutex.h" +#include "common/thread_pool.h" +#include "load_balancer/lb_node.h" +#include "load_balancer/plan.h" +#include "master/tablet_manager.h" +#include "master/tabletnode_manager.h" +#include "proto/load_balancer_rpc.pb.h" +#include "sdk/client_impl.h" + +namespace tera { +namespace load_balancer { + +class LBImpl { +public: + LBImpl(); + virtual ~LBImpl(); + + bool Init(); + + void CmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response, + google::protobuf::Closure* done); + +private: + void ScheduleLoadBalance(); + void DoLoadBalance(); + + bool CreateLBInput(const std::vector& tables, + const std::vector& nodes, + const std::vector& tablets, + std::vector>* lb_nodes); + + bool Collect(std::vector* nodes, + std::vector* tables, + std::vector* tablets); + + bool CollectNodes(std::vector* nodes); + bool NodeInfoToNode(const TabletNodeInfo& info, + tera::master::TabletNodePtr node); + tera::master::NodeState StringToNodeState(const std::string& str); + + bool CollectTablets(std::vector* tables, + std::vector* tablets); + bool TableMetaToTable(const TableMeta& meta, tera::master::TablePtr table); + + void ExecutePlan(const std::vector& plans); + + bool IsSafemode() const; + bool SetSafemode(bool value); + + bool GetMasterSafemode(bool* safe_mode); + + std::string GetMetaNodeAddr() const; + bool SetMetaNodeAddr(const std::string& addr); + + void DebugCollect(const std::vector& nodes, + const std::vector& tables, + const std::vector& tablets); + void DebugLBNode(const std::vector>& lb_nodes); + void DebugPlan(const std::vector& plans); + + void SafeModeCmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response); + +private: + mutable Mutex mutex_; + + std::unique_ptr thread_pool_; + std::unique_ptr sdk_client_; + + bool safemode_; + uint64_t round_; + std::string meta_node_addr_; + + bool lb_debug_mode_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_LB_IMPL_H_ diff --git a/src/load_balancer/lb_node.h b/src/load_balancer/lb_node.h new file mode 100644 index 000000000..b3b4430e2 --- /dev/null +++ b/src/load_balancer/lb_node.h @@ -0,0 +1,30 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_LB_NODE_H_ +#define TERA_LOAD_BALANCER_LB_NODE_H_ + +#include +#include +#include + +#include "master/tablet_manager.h" +#include "master/tabletnode_manager.h" + +namespace tera { +namespace load_balancer { + +struct LBTablet { + tera::master::TabletPtr tablet_ptr; +}; + +struct LBTabletNode { + tera::master::TabletNodePtr tablet_node_ptr; + std::vector> tablets; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_LB_NODE_H_ diff --git a/src/load_balancer/lb_service_impl.cc b/src/load_balancer/lb_service_impl.cc new file mode 100644 index 000000000..e67759c1c --- /dev/null +++ b/src/load_balancer/lb_service_impl.cc @@ -0,0 +1,49 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "load_balancer/lb_service_impl.h" + +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "load_balancer/lb_impl.h" +#include "utils/network_utils.h" + +DECLARE_int32(tera_lb_server_thread_num); + +namespace tera { +namespace load_balancer { + +LBServiceImpl::LBServiceImpl(const std::shared_ptr& lb_impl) : + lb_impl_(lb_impl), + thread_pool_(new ThreadPool(FLAGS_tera_lb_server_thread_num)) { +} + +LBServiceImpl::~LBServiceImpl() { +} + +void LBServiceImpl::CmdCtrl(google::protobuf::RpcController* controller, + const CmdCtrlRequest* request, + CmdCtrlResponse* response, + google::protobuf::Closure* done) { + VLOG(20) << "accept RPC (CmdCtrl) from: " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task task = + std::bind(&LBServiceImpl::DoCmdCtrl, this, controller, request, response, done); + thread_pool_->AddTask(task); +} + +void LBServiceImpl::DoCmdCtrl(google::protobuf::RpcController* controller, + const CmdCtrlRequest* request, + CmdCtrlResponse* response, + google::protobuf::Closure* done) { + VLOG(20) << "run RPC (CmdCtrl)"; + lb_impl_->CmdCtrl(request, response, done); + VLOG(20) << "finish RPC (CmdCtrl)"; +} + +} // namespace load_balancer +} // namespace tera + diff --git a/src/load_balancer/lb_service_impl.h b/src/load_balancer/lb_service_impl.h new file mode 100644 index 000000000..f0754bb6e --- /dev/null +++ b/src/load_balancer/lb_service_impl.h @@ -0,0 +1,42 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_LB_SERVICE_IMPL_H_ +#define TERA_LOAD_BALANCER_LB_SERVICE_IMPL_H_ + +#include + +#include "common/thread_pool.h" +#include "proto/load_balancer_rpc.pb.h" + +namespace tera { +namespace load_balancer { + +class LBImpl; + +class LBServiceImpl: public LoadBalancerService { +public: + explicit LBServiceImpl(const std::shared_ptr& lb_impl); + virtual ~LBServiceImpl(); + + void CmdCtrl(google::protobuf::RpcController* controller, + const CmdCtrlRequest* request, + CmdCtrlResponse* response, + google::protobuf::Closure* done); + +private: + void DoCmdCtrl(google::protobuf::RpcController* controller, + const CmdCtrlRequest* request, + CmdCtrlResponse* response, + google::protobuf::Closure* done); + +private: + std::shared_ptr lb_impl_; + std::unique_ptr thread_pool_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_LB_SERVICE_IMPL_H_ diff --git a/src/load_balancer/options.h b/src/load_balancer/options.h new file mode 100644 index 000000000..4d280c6ce --- /dev/null +++ b/src/load_balancer/options.h @@ -0,0 +1,100 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_OPTIONS_H_ +#define TERA_LOAD_BALANCER_OPTIONS_H_ + +#include + +namespace tera { +namespace load_balancer { + +struct LBOptions { + // calculate + uint64_t max_compute_steps; + uint32_t max_compute_steps_per_tablet; + uint64_t max_compute_time_ms; + double min_cost_need_balance; + + // MoveCountCostFunction + double move_count_cost_weight; + uint32_t tablet_max_move_num; + double tablet_max_move_percent; + + // MoveFrequencyCostFunction + double move_frequency_cost_weight; + uint32_t tablet_move_too_frequently_threshold_s; + + // AbnormalNodeCostFunction + double abnormal_node_cost_weight; + // if not ready tablets's ratio is hither than this value, + // the node in considered abnormal + double abnormal_node_ratio; + + // ReadPendingNodeCostFunction + double read_pending_node_cost_weight; + + // WritePendingNodeCostFunction + double write_pending_node_cost_weight; + + // ScanPendingNodeCostFunction + double scan_pending_node_cost_weight; + + // CountCostFunction + double tablet_count_cost_weight; + + // SizeCostFunction + double size_cost_weight; + + // LoadCostFunction + double read_load_cost_weight; + double write_load_cost_weight; + double scan_load_cost_weight; + + // meta table + bool meta_table_isolate_enabled; + std::string meta_table_name; + std::string meta_table_node_addr; + + // debug + bool debug_mode_enabled; + + LBOptions() : + max_compute_steps(1000000), + max_compute_steps_per_tablet(1000), + max_compute_time_ms(30 * 1000), + min_cost_need_balance(0.1), + + move_count_cost_weight(10), + tablet_max_move_num(10), + tablet_max_move_percent(0.001), + + move_frequency_cost_weight(10), + tablet_move_too_frequently_threshold_s(600), + + abnormal_node_cost_weight(10), + abnormal_node_ratio(0.5), + + read_pending_node_cost_weight(10), + write_pending_node_cost_weight(10), + scan_pending_node_cost_weight(10), + + tablet_count_cost_weight(0), + size_cost_weight(100), + read_load_cost_weight(0), + write_load_cost_weight(0), + scan_load_cost_weight(0), + + meta_table_isolate_enabled(true), + meta_table_name("meta_table"), + meta_table_node_addr(""), + + debug_mode_enabled(false) { + } +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_OPTIONS_H_ diff --git a/src/load_balancer/plan.h b/src/load_balancer/plan.h new file mode 100644 index 000000000..6e4ca41ae --- /dev/null +++ b/src/load_balancer/plan.h @@ -0,0 +1,71 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_PLAN_H_ +#define TERA_LOAD_BALANCER_PLAN_H_ + +#include + +#include "master/tablet_manager.h" +#include "master/tabletnode_manager.h" + +namespace tera { +namespace load_balancer { + +class Plan { +public: + Plan() {} + + Plan(const tera::master::TabletPtr& tablet, + const tera::master::TabletNodePtr& source, + const tera::master::TabletNodePtr& dest) { + tablet_ = tablet; + source_ = source; + dest_ = dest; + } + + virtual ~Plan() {} + + virtual std::string TabletPath() const { + if (tablet_) { + return tablet_->GetPath(); + } else { + return ""; + } + } + + virtual std::string SourceAddr() const { + if (source_) { + return source_->GetAddr(); + } else { + return ""; + } + } + + virtual std::string DestAddr() const { + if (dest_) { + return dest_->GetAddr(); + } else { + return ""; + } + } + + virtual std::string ToString() const { + std::string str = "tablet:" + (tablet_ ? tablet_->GetPath() : "") + + " source:" + (source_ ? source_->GetAddr() : "") + + " dest:" + (dest_ ? dest_->GetAddr() : ""); + + return str; + } + +private: + tera::master::TabletPtr tablet_; + tera::master::TabletNodePtr source_; + tera::master::TabletNodePtr dest_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_PLAN_H_ diff --git a/src/load_balancer/random.h b/src/load_balancer/random.h new file mode 100644 index 000000000..46a43008f --- /dev/null +++ b/src/load_balancer/random.h @@ -0,0 +1,73 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_RANDOM_H_ +#define TERA_LOAD_BALANCER_RANDOM_H_ + +#include + +#include +#include + +#include "common/timer.h" + +namespace tera { +namespace load_balancer { + +class Random { +public: + // random from [a, b) + // a < b should be ensured + // can generate negative number + // avg time cost: 25us + static int RandStd(int a, int b) { + assert(a < b); + + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> dis(a, b - 1); + + return dis(gen); + } + + // random from [a, b) + // a < b should be ensured + // can not generate negative number + // avg time cost: 150ns + static uint32_t RandTime(uint32_t a, uint32_t b) { + assert(a < b); + + int64_t time_us = get_micros(); + return time_us % (b - a) + a; + } + + // random from [a, b) + // a < b should be ensured + // can not generate negative number + // avg time cost: 15ns + static uint32_t Rand(uint32_t a, uint32_t b) { + assert(a < b); + + uint32_t rand = xorshift32(); + return rand % (b - a) + a; + } + +private: + /* The state word must be initialized to non-zero */ + static uint32_t xorshift32() { + /* Algorithm "xor" from p. 4 of Marsaglia, "Xorshift RNGs" */ + static uint32_t state = time(NULL); + uint32_t x = state; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + state = x; + return x; + } +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_RANDOM_H_ diff --git a/src/load_balancer/test/action_generators_test.cc b/src/load_balancer/test/action_generators_test.cc new file mode 100644 index 000000000..6cbe65e4d --- /dev/null +++ b/src/load_balancer/test/action_generators_test.cc @@ -0,0 +1,311 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "load_balancer/action_generators.h" + +namespace tera { +namespace load_balancer { + +class RandomActionGeneratorTest : public ::testing::Test { +public: + virtual void SetUp() { + random_action_generator_.reset(new RandomActionGenerator()); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + } + + virtual void TearDown() { + } + +private: + std::shared_ptr random_action_generator_; + std::shared_ptr cluster_; +}; + +class TabletCountActionGeneratorTest : public ::testing::Test { +public: + virtual void SetUp() { + tablet_count_action_generator_.reset(new TabletCountActionGenerator()); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + } + + virtual void TearDown() { + } + +private: + std::shared_ptr tablet_count_action_generator_; + std::shared_ptr cluster_; +}; + +class SizeActionGeneratorTest : public ::testing::Test { +public: + virtual void SetUp() { + size_action_generator_.reset(new SizeActionGenerator()); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + } + + virtual void TearDown() { + } + +private: + std::shared_ptr size_action_generator_; + std::shared_ptr cluster_; +}; + +class ReadLoadActionGeneratorTest : public ::testing::Test { +public: + virtual void SetUp() { + read_load_action_generator_.reset(new ReadLoadActionGenerator()); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + } + + virtual void TearDown() { + } + +private: + std::shared_ptr read_load_action_generator_; + std::shared_ptr cluster_; +}; + +class WriteLoadActionGeneratorTest : public ::testing::Test { +public: + virtual void SetUp() { + write_load_action_generator_.reset(new WriteLoadActionGenerator()); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + } + + virtual void TearDown() { + } + +private: + std::shared_ptr write_load_action_generator_; + std::shared_ptr cluster_; +}; + +class ScanLoadActionGeneratorTest : public ::testing::Test { +public: + virtual void SetUp() { + scan_load_action_generator_.reset(new ScanLoadActionGenerator()); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + } + + virtual void TearDown() { + } + +private: + std::shared_ptr scan_load_action_generator_; + std::shared_ptr cluster_; +}; + +TEST_F(RandomActionGeneratorTest, PickNodeTest) { + cluster_->tablet_node_num_ = 10; + + uint32_t index = random_action_generator_->PickRandomNode(cluster_); + ASSERT_GE(index, 0); + ASSERT_LT(index, cluster_->tablet_node_num_); + + uint32_t other_index = random_action_generator_->PickOtherRandomNode(cluster_, index); + ASSERT_GE(other_index, 0); + ASSERT_LT(other_index, cluster_->tablet_node_num_); + ASSERT_NE(index, other_index); +} + +TEST_F(RandomActionGeneratorTest, PickRandomTabletOfNodeTest) { + cluster_->tablet_node_num_ = 1; + ASSERT_EQ(random_action_generator_->PickRandomTabletOfNode(cluster_, 0), std::numeric_limits::max()); + + cluster_->tablets_per_node_[0].emplace_back(0); + ASSERT_EQ(random_action_generator_->PickRandomTabletOfNode(cluster_, 0), 0); +} + +TEST_F(RandomActionGeneratorTest, GenerateTest) { + cluster_->tablet_node_num_ = 1; + std::shared_ptr action(random_action_generator_->Generate(cluster_)); + ASSERT_EQ(Action::Type::EMPTY, action->GetType()); + + cluster_->tablet_node_num_ = 2; + cluster_->tablets_per_node_[0].emplace_back(0); + cluster_->tablets_per_node_[1].emplace_back(1); + std::shared_ptr action_0(random_action_generator_->Generate(cluster_)); + ASSERT_EQ(Action::Type::MOVE, action_0->GetType()); +} + +TEST_F(TabletCountActionGeneratorTest, GenerateTest) { + uint32_t more_tablets_node_index = 0; + uint32_t less_tablets_node_index = 1; + cluster_->tablets_per_node_[more_tablets_node_index].emplace_back(0); + cluster_->tablets_per_node_[more_tablets_node_index].emplace_back(1); + cluster_->tablets_per_node_[less_tablets_node_index].emplace_back(2); + + cluster_->tablet_node_num_ = 2; + + cluster_->node_index_sorted_by_tablet_count_.emplace_back(more_tablets_node_index); + cluster_->node_index_sorted_by_tablet_count_.emplace_back(less_tablets_node_index); + + cluster_->SortNodesByTabletCount(); + ASSERT_EQ(more_tablets_node_index, tablet_count_action_generator_->PickMostTabletsNode(cluster_)); + ASSERT_EQ(less_tablets_node_index, tablet_count_action_generator_->PickLeastTabletsNode(cluster_)); + + std::shared_ptr action(tablet_count_action_generator_->Generate(cluster_)); + ASSERT_EQ(Action::Type::MOVE, action->GetType()); + MoveAction* move_action = dynamic_cast(action.get()); + ASSERT_EQ(more_tablets_node_index, move_action->source_node_index_); + ASSERT_EQ(less_tablets_node_index, move_action->dest_node_index_); + + cluster_->meta_table_node_index_ = less_tablets_node_index; + ASSERT_EQ(more_tablets_node_index, tablet_count_action_generator_->PickMostTabletsNode(cluster_)); + ASSERT_EQ(more_tablets_node_index, tablet_count_action_generator_->PickLeastTabletsNode(cluster_)); +} + +TEST_F(SizeActionGeneratorTest, GenerateTest) { + uint32_t larger_size_node_index = 0; + uint32_t smaller_size_node_index = 1; + cluster_->size_per_node_[larger_size_node_index] = 20; + cluster_->size_per_node_[smaller_size_node_index] = 10; + + uint32_t tablet_index_on_larger_size_node = 0; + uint32_t tablet_index_on_smaller_size_node = 1; + cluster_->tablet_node_num_ = 2; + cluster_->tablets_per_node_[larger_size_node_index].emplace_back(tablet_index_on_larger_size_node); + cluster_->tablets_per_node_[smaller_size_node_index].emplace_back(tablet_index_on_smaller_size_node); + + cluster_->node_index_sorted_by_size_.emplace_back(larger_size_node_index); + cluster_->node_index_sorted_by_size_.emplace_back(smaller_size_node_index); + + cluster_->SortNodesBySize(); + ASSERT_EQ(larger_size_node_index, size_action_generator_->PickLargestSizeNode(cluster_)); + ASSERT_EQ(smaller_size_node_index, size_action_generator_->PickSmallestSizeNode(cluster_)); + + std::shared_ptr action(size_action_generator_->Generate(cluster_)); + ASSERT_EQ(Action::Type::MOVE, action->GetType()); + MoveAction* move_action = dynamic_cast(action.get()); + ASSERT_EQ(tablet_index_on_larger_size_node, move_action->tablet_index_); + ASSERT_EQ(larger_size_node_index, move_action->source_node_index_); + ASSERT_EQ(smaller_size_node_index, move_action->dest_node_index_); + + cluster_->meta_table_node_index_ = smaller_size_node_index; + ASSERT_EQ(larger_size_node_index, size_action_generator_->PickLargestSizeNode(cluster_)); + ASSERT_EQ(larger_size_node_index, size_action_generator_->PickSmallestSizeNode(cluster_)); +} + +TEST_F(ReadLoadActionGeneratorTest, GenerateTest) { + uint32_t more_read_node_index = 0; + uint32_t less_read_node_index = 1; + cluster_->read_load_per_node_[more_read_node_index] = 20; + cluster_->read_load_per_node_[less_read_node_index] = 10; + + uint32_t tablet_index_on_more_read_node = 0; + uint32_t tablet_index_on_less_read_node = 1; + cluster_->tablet_node_num_ = 2; + cluster_->tablets_per_node_[more_read_node_index].emplace_back(tablet_index_on_more_read_node); + cluster_->tablets_per_node_[less_read_node_index].emplace_back(tablet_index_on_less_read_node); + + cluster_->node_index_sorted_by_read_load_.emplace_back(more_read_node_index); + cluster_->node_index_sorted_by_read_load_.emplace_back(less_read_node_index); + + cluster_->SortNodesByReadLoad(); + ASSERT_EQ(more_read_node_index, read_load_action_generator_->PickMostReadNode(cluster_)); + ASSERT_EQ(less_read_node_index, read_load_action_generator_->PickLeastReadNode(cluster_)); + + std::shared_ptr action(read_load_action_generator_->Generate(cluster_)); + ASSERT_EQ(Action::Type::MOVE, action->GetType()); + MoveAction* move_action = dynamic_cast(action.get()); + ASSERT_EQ(tablet_index_on_more_read_node, move_action->tablet_index_); + ASSERT_EQ(more_read_node_index, move_action->source_node_index_); + ASSERT_EQ(less_read_node_index, move_action->dest_node_index_); + + cluster_->meta_table_node_index_ = less_read_node_index; + ASSERT_EQ(more_read_node_index, read_load_action_generator_->PickMostReadNode(cluster_)); + ASSERT_EQ(more_read_node_index, read_load_action_generator_->PickLeastReadNode(cluster_)); +} + +TEST_F(WriteLoadActionGeneratorTest, GenerateTest) { + uint32_t more_write_node_index = 0; + uint32_t less_write_node_index = 1; + cluster_->write_load_per_node_[more_write_node_index] = 20; + cluster_->write_load_per_node_[less_write_node_index] = 10; + + uint32_t tablet_index_on_more_write_node = 0; + uint32_t tablet_index_on_less_write_node = 1; + cluster_->tablet_node_num_ = 2; + cluster_->tablets_per_node_[more_write_node_index].emplace_back(tablet_index_on_more_write_node); + cluster_->tablets_per_node_[less_write_node_index].emplace_back(tablet_index_on_less_write_node); + + cluster_->node_index_sorted_by_write_load_.emplace_back(more_write_node_index); + cluster_->node_index_sorted_by_write_load_.emplace_back(less_write_node_index); + + cluster_->SortNodesByWriteLoad(); + ASSERT_EQ(more_write_node_index, write_load_action_generator_->PickMostWriteNode(cluster_)); + ASSERT_EQ(less_write_node_index, write_load_action_generator_->PickLeastWriteNode(cluster_)); + + std::shared_ptr action(write_load_action_generator_->Generate(cluster_)); + ASSERT_EQ(Action::Type::MOVE, action->GetType()); + MoveAction* move_action = dynamic_cast(action.get()); + ASSERT_EQ(tablet_index_on_more_write_node, move_action->tablet_index_); + ASSERT_EQ(more_write_node_index, move_action->source_node_index_); + ASSERT_EQ(less_write_node_index, move_action->dest_node_index_); + + cluster_->meta_table_node_index_ = less_write_node_index; + ASSERT_EQ(more_write_node_index, write_load_action_generator_->PickMostWriteNode(cluster_)); + ASSERT_EQ(more_write_node_index, write_load_action_generator_->PickLeastWriteNode(cluster_)); +} + +TEST_F(ScanLoadActionGeneratorTest, GenerateTest) { + uint32_t more_scan_node_index = 0; + uint32_t less_scan_node_index = 1; + cluster_->scan_load_per_node_[more_scan_node_index] = 20; + cluster_->scan_load_per_node_[less_scan_node_index] = 10; + + uint32_t tablet_index_on_more_scan_node = 0; + uint32_t tablet_index_on_less_scan_node = 1; + cluster_->tablet_node_num_ = 2; + cluster_->tablets_per_node_[more_scan_node_index].emplace_back(tablet_index_on_more_scan_node); + cluster_->tablets_per_node_[less_scan_node_index].emplace_back(tablet_index_on_less_scan_node); + + cluster_->node_index_sorted_by_scan_load_.emplace_back(more_scan_node_index); + cluster_->node_index_sorted_by_scan_load_.emplace_back(less_scan_node_index); + + cluster_->SortNodesByScanLoad(); + ASSERT_EQ(more_scan_node_index, scan_load_action_generator_->PickMostScanNode(cluster_)); + ASSERT_EQ(less_scan_node_index, scan_load_action_generator_->PickLeastScanNode(cluster_)); + + std::shared_ptr action(scan_load_action_generator_->Generate(cluster_)); + ASSERT_EQ(Action::Type::MOVE, action->GetType()); + MoveAction* move_action = dynamic_cast(action.get()); + ASSERT_EQ(tablet_index_on_more_scan_node, move_action->tablet_index_); + ASSERT_EQ(more_scan_node_index, move_action->source_node_index_); + ASSERT_EQ(less_scan_node_index, move_action->dest_node_index_); + + cluster_->meta_table_node_index_ = less_scan_node_index; + ASSERT_EQ(more_scan_node_index, scan_load_action_generator_->PickMostScanNode(cluster_)); + ASSERT_EQ(more_scan_node_index, scan_load_action_generator_->PickLeastScanNode(cluster_)); +} + +} // namespace load_balancer +} // namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/actions_test.cc b/src/load_balancer/test/actions_test.cc new file mode 100644 index 000000000..28096efa3 --- /dev/null +++ b/src/load_balancer/test/actions_test.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "load_balancer/actions.h" + +namespace tera { +namespace load_balancer { + +class ActionsTest : public ::testing::Test { +}; + +TEST_F(ActionsTest, MoveActionTest) { + MoveAction move_action(0, 0, 1); + std::shared_ptr undo_action(dynamic_cast(move_action.UndoAction())); + + ASSERT_EQ(move_action.tablet_index_, undo_action->tablet_index_); + ASSERT_EQ(move_action.source_node_index_, undo_action->dest_node_index_); + ASSERT_EQ(move_action.dest_node_index_, undo_action->source_node_index_); +} + +} // namespace load_balancer +} // namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/balancer_test_main.cc b/src/load_balancer/test/balancer_test_main.cc new file mode 100644 index 000000000..c08b2451d --- /dev/null +++ b/src/load_balancer/test/balancer_test_main.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "utils/utils_cmd.h" + +int main(int argc, char** argv) { + ::google::InitGoogleLogging(argv[0]); + FLAGS_v = 16; + FLAGS_minloglevel=0; + FLAGS_log_dir = "./log"; + if (access(FLAGS_log_dir.c_str(), F_OK)) { + mkdir(FLAGS_log_dir.c_str(), 0777); + } + std::string pragram_name("load balancer"); + tera::utils::SetupLog(pragram_name); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + + return RUN_ALL_TESTS(); +} + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/cluster_test.cc b/src/load_balancer/test/cluster_test.cc new file mode 100644 index 000000000..026ad1b78 --- /dev/null +++ b/src/load_balancer/test/cluster_test.cc @@ -0,0 +1,391 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "load_balancer/actions.h" +#include "load_balancer/cluster.h" +#include "load_balancer/lb_node.h" +#include "common/timer.h" + +namespace tera { +namespace load_balancer { + +class ClusterTest : public ::testing::Test { +public: + virtual void SetUp() { + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + } + + virtual void TearDown() { + } + +private: + std::shared_ptr cluster_; +}; + +TEST_F(ClusterTest, ValidActionTest) { + TabletMeta tablet_meta_meta; + TabletMeta tablet_meta_other; + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + tera::master::TabletPtr tablet_ptr_other(new tera::master::Tablet(tablet_meta_other)); + std::shared_ptr lb_tablet_meta = std::make_shared(); + std::shared_ptr lb_tablet_other = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + lb_tablet_other->tablet_ptr = tablet_ptr_other; + + cluster_->lb_options_.meta_table_name = "meta_table"; + uint32_t table_index_meta = 0; + uint32_t table_index_other= 1; + cluster_->tables_[table_index_meta] = "meta_table"; + cluster_->tables_[table_index_other] = "other_table"; + uint32_t tablet_index_meta = 0; + uint32_t tablet_index_other = 1; + cluster_->tablet_index_to_table_index_[tablet_index_meta] = table_index_meta; + cluster_->tablet_index_to_table_index_[tablet_index_other] = table_index_other; + cluster_->tablets_[tablet_index_meta] = lb_tablet_meta; + cluster_->tablets_[tablet_index_other] = lb_tablet_other; + + uint32_t meta_table_node_index = 0; + uint32_t other_node_index = 1; + cluster_->meta_table_node_index_ = meta_table_node_index; + + // empty action is invalid + std::shared_ptr empty_action(new EmptyAction()); + ASSERT_FALSE(cluster_->ValidAction(empty_action)); + + std::shared_ptr normal_move_action(new MoveAction(tablet_index_meta, 0, 1)); + // move not ready tablet is invalid + ASSERT_TRUE(cluster_->tablets_[tablet_index_meta]->tablet_ptr->SetStatus(kTableOffLine)); + ASSERT_FALSE(cluster_->ValidAction(normal_move_action)); + + // move meta table is invalid + std::shared_ptr move_meta_table_action(new MoveAction(tablet_index_meta, 0, 1)); + ASSERT_TRUE(cluster_->tablets_[tablet_index_meta]->tablet_ptr->SetStatus(kTableReady)); + ASSERT_FALSE(cluster_->ValidAction(move_meta_table_action)); + // move nomal tablet is valid + std::shared_ptr move_other_table_action(new MoveAction(tablet_index_other, 0, 1)); + ASSERT_TRUE(cluster_->tablets_[tablet_index_other]->tablet_ptr->SetStatus(kTableReady)); + ASSERT_TRUE(cluster_->ValidAction(move_other_table_action)); + + std::shared_ptr move_to_meta_table_node_action(new MoveAction(tablet_index_other, 0, meta_table_node_index)); + std::shared_ptr move_to_other_node_action(new MoveAction(tablet_index_other, 0, other_node_index)); + cluster_->lb_options_.meta_table_isolate_enabled = true; + // move tablet to meta node is invalid if meta_table_isolate_enabled is true + ASSERT_FALSE(cluster_->ValidAction(move_to_meta_table_node_action)); + // move tablet to normal node is valid even if meta_table_isolate_enabled is true + ASSERT_TRUE(cluster_->ValidAction(move_to_other_node_action)); + cluster_->lb_options_.meta_table_isolate_enabled = false; + // move tablet to any node is valid if meta_table_isolate_enabled is true + ASSERT_TRUE(cluster_->ValidAction(move_to_meta_table_node_action)); + ASSERT_TRUE(cluster_->ValidAction(move_to_other_node_action)); +} + +TEST_F(ClusterTest, RegisterTabletTest) { + TabletMeta tablet_meta_meta; + tablet_meta_meta.set_table_name("meta_table"); + tablet_meta_meta.set_path("path/meta_table"); + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + std::shared_ptr lb_tablet_meta = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + + uint32_t tablet_index_0 = 0; + uint32_t node_index_0 = 0; + cluster_->RegisterTablet(lb_tablet_meta, tablet_index_0, node_index_0); + + ASSERT_EQ(1, cluster_->table_num_); + ASSERT_EQ(1, cluster_->tables_.size()); + ASSERT_STREQ("meta_table", cluster_->tables_[0].c_str()); + ASSERT_EQ(0, cluster_->tables_to_index_["meta_table"]); + + ASSERT_EQ(tablet_index_0, cluster_->tablets_to_index_["path/meta_table"]); + + ASSERT_EQ(node_index_0, cluster_->tablet_index_to_node_index_[tablet_index_0]); + ASSERT_EQ(node_index_0, cluster_->initial_tablet_index_to_node_index_[tablet_index_0]); + ASSERT_EQ(0, cluster_->tablet_index_to_table_index_[tablet_index_0]); +} + +TEST_F(ClusterTest, AddTabletTest) { + TabletMeta tablet_meta_meta; + tablet_meta_meta.set_size(10); + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + tablet_ptr_meta->average_counter_.set_read_rows(20); + tablet_ptr_meta->average_counter_.set_write_rows(30); + tablet_ptr_meta->average_counter_.set_scan_rows(40); + std::shared_ptr lb_tablet_meta = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + + uint32_t tablet_index = 0; + cluster_->tablets_[tablet_index] = lb_tablet_meta; + + uint32_t node_index = 0; + cluster_->size_per_node_[node_index] = 0; + cluster_->read_load_per_node_[node_index] = 0; + cluster_->write_load_per_node_[node_index] = 0; + cluster_->scan_load_per_node_[node_index] = 0; + + cluster_->AddTablet(tablet_index, node_index); + + ASSERT_EQ(1, cluster_->tablets_per_node_.size()); + ASSERT_EQ(10, cluster_->size_per_node_[node_index]); + ASSERT_EQ(20, cluster_->read_load_per_node_[node_index]); + ASSERT_EQ(30, cluster_->write_load_per_node_[node_index]); + ASSERT_EQ(40, cluster_->scan_load_per_node_[node_index]); +} + +TEST_F(ClusterTest, RemoveTabletTest) { + TabletMeta tablet_meta_meta; + tablet_meta_meta.set_size(10); + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + tablet_ptr_meta->average_counter_.set_read_rows(20); + tablet_ptr_meta->average_counter_.set_write_rows(30); + tablet_ptr_meta->average_counter_.set_scan_rows(40); + std::shared_ptr lb_tablet_meta = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + + uint32_t tablet_index = 0; + cluster_->tablets_[tablet_index] = lb_tablet_meta; + + uint32_t node_index = 0; + cluster_->tablets_per_node_[node_index].emplace_back(tablet_index); + + cluster_->size_per_node_[node_index] = 10; + cluster_->read_load_per_node_[node_index] = 20; + cluster_->write_load_per_node_[node_index] = 30; + cluster_->scan_load_per_node_[node_index] = 40; + + cluster_->RemoveTablet(tablet_index, node_index); + + ASSERT_EQ(0, cluster_->tablets_per_node_[node_index].size()); + ASSERT_EQ(0, cluster_->size_per_node_[node_index]); + ASSERT_EQ(0, cluster_->read_load_per_node_[node_index]); + ASSERT_EQ(0, cluster_->write_load_per_node_[node_index]); + ASSERT_EQ(0, cluster_->scan_load_per_node_[node_index]); +} + +TEST_F(ClusterTest, MoveTabletTest) { + TabletMeta tablet_meta_meta; + tablet_meta_meta.set_size(10); + tera::master::TabletPtr tablet_ptr_meta(new tera::master::Tablet(tablet_meta_meta)); + tablet_ptr_meta->average_counter_.set_read_rows(20); + tablet_ptr_meta->average_counter_.set_write_rows(30); + tablet_ptr_meta->average_counter_.set_scan_rows(40); + std::shared_ptr lb_tablet_meta = std::make_shared(); + lb_tablet_meta->tablet_ptr = tablet_ptr_meta; + + uint32_t tablet_index = 0; + uint32_t first_node_index = 0; + uint32_t second_node_index = 1; + uint32_t third_node_index = 2; + + cluster_->tablets_[tablet_index] = lb_tablet_meta; + cluster_->tablet_moved_num_ = 0; + cluster_->initial_tablet_index_to_node_index_[tablet_index] = first_node_index; + cluster_->tablet_index_to_node_index_[tablet_index] = first_node_index; + cluster_->abnormal_nodes_index_.insert(second_node_index); + cluster_->read_pending_nodes_index_.insert(second_node_index); + cluster_->write_pending_nodes_index_.insert(second_node_index); + cluster_->scan_pending_nodes_index_.insert(second_node_index); + + ASSERT_EQ(0, cluster_->tablets_moved_too_frequently_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_abnormal_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_read_pending_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_write_pending_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_scan_pending_nodes_.size()); + ASSERT_TRUE(cluster_->tablets_[tablet_index]->tablet_ptr->SetStatus(kTableReady)); + int64_t current_time_us = tera::get_micros(); + cluster_->lb_options_.tablet_move_too_frequently_threshold_s = 600; + cluster_->tablets_[tablet_index]->tablet_ptr->last_move_time_us_ = current_time_us; + + cluster_->MoveTablet(tablet_index, first_node_index, second_node_index); + ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(second_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(1, cluster_->tablet_moved_num_); + ASSERT_EQ(1, cluster_->tablets_moved_too_frequently_.size()); + ASSERT_EQ(1, cluster_->tablets_moved_to_abnormal_nodes_.size()); + ASSERT_EQ(1, cluster_->tablets_moved_to_read_pending_nodes_.size()); + ASSERT_EQ(1, cluster_->tablets_moved_to_write_pending_nodes_.size()); + ASSERT_EQ(1, cluster_->tablets_moved_to_scan_pending_nodes_.size()); + + cluster_->MoveTablet(tablet_index, second_node_index, third_node_index); + ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(third_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(1, cluster_->tablet_moved_num_); + ASSERT_EQ(1, cluster_->tablets_moved_too_frequently_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_abnormal_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_read_pending_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_write_pending_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_scan_pending_nodes_.size()); + + cluster_->MoveTablet(tablet_index, third_node_index, first_node_index); + ASSERT_EQ(first_node_index, cluster_->initial_tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(first_node_index, cluster_->tablet_index_to_node_index_[tablet_index]); + ASSERT_EQ(0, cluster_->tablet_moved_num_); + ASSERT_EQ(0, cluster_->tablets_moved_too_frequently_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_abnormal_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_read_pending_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_write_pending_nodes_.size()); + ASSERT_EQ(0, cluster_->tablets_moved_to_scan_pending_nodes_.size()); + + cluster_->tablets_[tablet_index]->tablet_ptr->last_move_time_us_ = current_time_us - 2 * cluster_->lb_options_.tablet_move_too_frequently_threshold_s * 1000000; + cluster_->MoveTablet(tablet_index, first_node_index, second_node_index); + ASSERT_EQ(0, cluster_->tablets_moved_too_frequently_.size()); + ASSERT_EQ(1, cluster_->tablets_moved_to_abnormal_nodes_.size()); + ASSERT_EQ(1, cluster_->tablets_moved_to_read_pending_nodes_.size()); + ASSERT_EQ(1, cluster_->tablets_moved_to_write_pending_nodes_.size()); + ASSERT_EQ(1, cluster_->tablets_moved_to_scan_pending_nodes_.size()); +} + +TEST_F(ClusterTest, AbnormalNodeConstructTest) { + TabletMeta tablet_meta_0; + tablet_meta_0.set_path("path/meta_0"); + tera::master::TabletPtr tablet_ptr_0(new tera::master::Tablet(tablet_meta_0)); + std::shared_ptr lb_tablet_0 = std::make_shared(); + lb_tablet_0->tablet_ptr = tablet_ptr_0; + + TabletMeta tablet_meta_1; + tablet_meta_1.set_path("path/meta_1"); + tera::master::TabletPtr tablet_ptr_1(new tera::master::Tablet(tablet_meta_1)); + std::shared_ptr lb_tablet_1 = std::make_shared(); + lb_tablet_1->tablet_ptr = tablet_ptr_1; + + TabletMeta tablet_meta_2; + tablet_meta_2.set_path("path/meta_2"); + tera::master::TabletPtr tablet_ptr_2(new tera::master::Tablet(tablet_meta_2)); + std::shared_ptr lb_tablet_2 = std::make_shared(); + lb_tablet_2->tablet_ptr = tablet_ptr_2; + + tera::master::TabletNodePtr tablet_node_ptr(new tera::master::TabletNode()); + tablet_node_ptr->addr_ = "127.0.0.1:2200"; + std::shared_ptr lb_node = std::make_shared(); + lb_node->tablet_node_ptr = tablet_node_ptr; + lb_node->tablets.emplace_back(lb_tablet_0); + lb_node->tablets.emplace_back(lb_tablet_1); + lb_node->tablets.emplace_back(lb_tablet_2); + + std::vector> lb_nodes; + lb_nodes.emplace_back(lb_node); + + LBOptions options; + options.abnormal_node_ratio = 0.5; + + tablet_ptr_0->SetStatus(kTableReady); + tablet_ptr_1->SetStatus(kTableReady); + tablet_ptr_2->SetStatus(kTableReady); + cluster_.reset(new Cluster(lb_nodes, options)); + ASSERT_EQ(0, cluster_->initial_tablets_not_ready_per_node_[0].size()); + ASSERT_EQ(0, cluster_->abnormal_nodes_index_.size()); + + tablet_ptr_0->SetStatus(kTableOffLine); + cluster_.reset(new Cluster(lb_nodes, options)); + ASSERT_EQ(1, cluster_->initial_tablets_not_ready_per_node_[0].size()); + ASSERT_EQ(0, cluster_->abnormal_nodes_index_.size()); + + tablet_ptr_1->SetStatus(kTableOffLine); + cluster_.reset(new Cluster(lb_nodes, options)); + ASSERT_EQ(2, cluster_->initial_tablets_not_ready_per_node_[0].size()); + ASSERT_EQ(1, cluster_->abnormal_nodes_index_.size()); +} + +TEST_F(ClusterTest, SortNodesByTabletCount) { + cluster_->tablets_per_node_[0].emplace_back(0); + cluster_->tablets_per_node_[0].emplace_back(1); + cluster_->tablets_per_node_[1].emplace_back(2); + cluster_->tablets_per_node_[2].emplace_back(3); + cluster_->tablets_per_node_[2].emplace_back(4); + cluster_->tablets_per_node_[2].emplace_back(5); + + cluster_->node_index_sorted_by_tablet_count_.emplace_back(0); + cluster_->node_index_sorted_by_tablet_count_.emplace_back(1); + cluster_->node_index_sorted_by_tablet_count_.emplace_back(2); + ASSERT_EQ(0, cluster_->node_index_sorted_by_tablet_count_[0]); + ASSERT_EQ(1, cluster_->node_index_sorted_by_tablet_count_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_tablet_count_[2]); + + cluster_->SortNodesByTabletCount(); + ASSERT_EQ(1, cluster_->node_index_sorted_by_tablet_count_[0]); + ASSERT_EQ(0, cluster_->node_index_sorted_by_tablet_count_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_tablet_count_[2]); +} + +TEST_F(ClusterTest, SortNodesBySizeTest) { + cluster_->size_per_node_[0] = 20; + cluster_->size_per_node_[1] = 10; + cluster_->size_per_node_[2] = 30; + + cluster_->node_index_sorted_by_size_.emplace_back(0); + cluster_->node_index_sorted_by_size_.emplace_back(1); + cluster_->node_index_sorted_by_size_.emplace_back(2); + ASSERT_EQ(0, cluster_->node_index_sorted_by_size_[0]); + ASSERT_EQ(1, cluster_->node_index_sorted_by_size_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_size_[2]); + + cluster_->SortNodesBySize(); + ASSERT_EQ(1, cluster_->node_index_sorted_by_size_[0]); + ASSERT_EQ(0, cluster_->node_index_sorted_by_size_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_size_[2]); +} + +TEST_F(ClusterTest, SortNodesByReadLoad) { + cluster_->read_load_per_node_[0] = 20; + cluster_->read_load_per_node_[1] = 10; + cluster_->read_load_per_node_[2] = 30; + + cluster_->node_index_sorted_by_read_load_.emplace_back(0); + cluster_->node_index_sorted_by_read_load_.emplace_back(1); + cluster_->node_index_sorted_by_read_load_.emplace_back(2); + ASSERT_EQ(0, cluster_->node_index_sorted_by_read_load_[0]); + ASSERT_EQ(1, cluster_->node_index_sorted_by_read_load_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_read_load_[2]); + + cluster_->SortNodesByReadLoad(); + ASSERT_EQ(1, cluster_->node_index_sorted_by_read_load_[0]); + ASSERT_EQ(0, cluster_->node_index_sorted_by_read_load_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_read_load_[2]); +} + +TEST_F(ClusterTest, SortNodesByWriteLoad) { + cluster_->write_load_per_node_[0] = 20; + cluster_->write_load_per_node_[1] = 10; + cluster_->write_load_per_node_[2] = 30; + + cluster_->node_index_sorted_by_write_load_.emplace_back(0); + cluster_->node_index_sorted_by_write_load_.emplace_back(1); + cluster_->node_index_sorted_by_write_load_.emplace_back(2); + ASSERT_EQ(0, cluster_->node_index_sorted_by_write_load_[0]); + ASSERT_EQ(1, cluster_->node_index_sorted_by_write_load_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_write_load_[2]); + + cluster_->SortNodesByWriteLoad(); + ASSERT_EQ(1, cluster_->node_index_sorted_by_write_load_[0]); + ASSERT_EQ(0, cluster_->node_index_sorted_by_write_load_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_write_load_[2]); +} + +TEST_F(ClusterTest, SortNodesByScanLoad) { + cluster_->scan_load_per_node_[0] = 20; + cluster_->scan_load_per_node_[1] = 10; + cluster_->scan_load_per_node_[2] = 30; + + cluster_->node_index_sorted_by_scan_load_.emplace_back(0); + cluster_->node_index_sorted_by_scan_load_.emplace_back(1); + cluster_->node_index_sorted_by_scan_load_.emplace_back(2); + ASSERT_EQ(0, cluster_->node_index_sorted_by_scan_load_[0]); + ASSERT_EQ(1, cluster_->node_index_sorted_by_scan_load_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_scan_load_[2]); + + cluster_->SortNodesByScanLoad(); + ASSERT_EQ(1, cluster_->node_index_sorted_by_scan_load_[0]); + ASSERT_EQ(0, cluster_->node_index_sorted_by_scan_load_[1]); + ASSERT_EQ(2, cluster_->node_index_sorted_by_scan_load_[2]); +} + +} // namespace load_balancer +} // namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/cost_functions_test.cc b/src/load_balancer/test/cost_functions_test.cc new file mode 100644 index 000000000..84f546fba --- /dev/null +++ b/src/load_balancer/test/cost_functions_test.cc @@ -0,0 +1,176 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "load_balancer/cost_functions.h" +#include "load_balancer/random.h" + +namespace tera { +namespace load_balancer { + +class CostFunctionTest : public ::testing::Test { +public: + virtual void SetUp() { + move_cost_function_.reset(new MoveCountCostFunction(lb_options_)); + } + + virtual void TearDown() { + } + +private: + LBOptions lb_options_; + std::shared_ptr move_cost_function_; +}; + +class MoveCountCostFunctionTest : public ::testing::Test { +public: + virtual void SetUp() { + move_cost_function_.reset(new MoveCountCostFunction(lb_options_)); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + + move_cost_function_->Init(cluster_); + } + + virtual void TearDown() { + } + +private: + LBOptions lb_options_; + std::shared_ptr move_cost_function_; + std::shared_ptr cluster_; +}; + +class TabletCountCostFunctionTest : public ::testing::Test { +public: + virtual void SetUp() { + tablet_count_cost_function_.reset(new TabletCountCostFunction(lb_options_)); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + + tablet_count_cost_function_->Init(cluster_); + } + + virtual void TearDown() { + } + +private: + LBOptions lb_options_; + std::shared_ptr tablet_count_cost_function_; + std::shared_ptr cluster_; +}; + +class SizeCostFunctionTest : public ::testing::Test { +public: + virtual void SetUp() { + size_cost_function_.reset(new SizeCostFunction(lb_options_)); + + std::vector> empty_lb_nodes; + LBOptions options; + cluster_.reset(new Cluster(empty_lb_nodes, options)); + + size_cost_function_->Init(cluster_); + } + + virtual void TearDown() { + } + +private: + LBOptions lb_options_; + std::shared_ptr size_cost_function_; + std::shared_ptr cluster_; +}; + +TEST_F(CostFunctionTest, WeightTest) { + double w = 3.14; + move_cost_function_->SetWeight(w); + ASSERT_DOUBLE_EQ(w, move_cost_function_->GetWeight()); +} + +TEST_F(CostFunctionTest, SumTest) { + std::vector stats = {1, 2, 3}; + ASSERT_DOUBLE_EQ(6, move_cost_function_->GetSum(stats)); +} + +TEST_F(CostFunctionTest, ScaleTest) { + // value <= min + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, -1)); + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, 0)); + + // max <= min + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 0, 5)); + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, -1, 5)); + + // normal case + ASSERT_DOUBLE_EQ(0, move_cost_function_->Scale(0, 10, 0)); + ASSERT_DOUBLE_EQ(0.5, move_cost_function_->Scale(0, 10, 5)); + ASSERT_DOUBLE_EQ(1, move_cost_function_->Scale(0, 10, 10)); + + // random case + size_t times = 100; + int min = 0; + int max = 10; + for (size_t i = 0; i < times; ++i) { + int value = Random::Rand(min, max + 1); + ASSERT_TRUE(move_cost_function_->Scale(min, max, value) >= 0); + ASSERT_TRUE(move_cost_function_->Scale(min, max, value) <= 1); + } +} + +TEST_F(CostFunctionTest, ScaleFromArrayTest) { + std::vector stats_0 = {0, 0}; + ASSERT_DOUBLE_EQ(0, move_cost_function_->ScaleFromArray(stats_0)); + + std::vector stats_1 = {10, 10}; + ASSERT_DOUBLE_EQ(0, move_cost_function_->ScaleFromArray(stats_0)); + + int begin = 0; + int end = 100; + size_t times = 100; + std::vector stats_2; + for (size_t i = 0; i < times; ++i) { + stats_2.clear(); + stats_2.emplace_back(Random::Rand(begin, end)); + stats_2.emplace_back(Random::Rand(begin, end)); + + ASSERT_TRUE(move_cost_function_->ScaleFromArray(stats_2) >= 0); + ASSERT_TRUE(move_cost_function_->ScaleFromArray(stats_2) <= 1); + } +} + +TEST_F(MoveCountCostFunctionTest, CostTest) { + move_cost_function_->tablet_max_move_num_ = 10; + move_cost_function_->tablet_max_move_percent_ = 0.05; + cluster_->tablet_num_ = 100; + + cluster_->tablet_moved_num_ = 1; + ASSERT_DOUBLE_EQ(0.1, move_cost_function_->Cost()); + + cluster_->tablet_moved_num_ = 6; + ASSERT_DOUBLE_EQ(0.6, move_cost_function_->Cost()); + + cluster_->tablet_moved_num_ = 10; + ASSERT_DOUBLE_EQ(1, move_cost_function_->Cost()); + + cluster_->tablet_moved_num_ = 11; + ASSERT_DOUBLE_EQ(move_cost_function_->kExpensiveCost, move_cost_function_->Cost()); +} + +TEST_F(TabletCountCostFunctionTest, CostTest) { +} + +TEST_F(SizeCostFunctionTest, CostTest) { +} + +} // namespace load_balancer +} // namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/test/random_test.cc b/src/load_balancer/test/random_test.cc new file mode 100644 index 000000000..385b76877 --- /dev/null +++ b/src/load_balancer/test/random_test.cc @@ -0,0 +1,44 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "load_balancer/random.h" + +namespace tera { +namespace load_balancer { + +class RandomTest : public ::testing::Test { +}; + +TEST_F(RandomTest, CommonTest) { + int start = 0; + int end = 3; + size_t times = 100; + + for (size_t i = 0; i < times; ++i) { + int rand = Random::Rand(start, end); + ASSERT_TRUE(rand >= start); + ASSERT_TRUE(rand < end); + } +} + +TEST_F(RandomTest, NegativeTest) { + int start = -10; + int end = 10; + size_t times = 100; + + for (size_t i = 0; i < times; ++i) { + int rand = Random::RandStd(start, end); + ASSERT_TRUE(rand >= start); + ASSERT_TRUE(rand < end); + } +} + +} // namespace load_balancer +} // namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/load_balancer/unity_balancer.cc b/src/load_balancer/unity_balancer.cc new file mode 100644 index 000000000..a6279d16f --- /dev/null +++ b/src/load_balancer/unity_balancer.cc @@ -0,0 +1,264 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "load_balancer/unity_balancer.h" + +#include +#include + +#include "glog/logging.h" +#include "load_balancer/random.h" +#include "common/timer.h" + +namespace tera { +namespace load_balancer { + +using tera::master::TabletNodePtr; +using tera::master::TabletPtr; + +UnityBalancer::UnityBalancer(const LBOptions& options) : + lb_options_(options) { + // cost functions + if (lb_options_.move_count_cost_weight > 0) { + cost_functions_.emplace_back(new MoveCountCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.move_frequency_cost_weight > 0){ + cost_functions_.emplace_back(new MoveFrequencyCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.abnormal_node_cost_weight > 0) { + cost_functions_.emplace_back(new AbnormalNodeCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.read_pending_node_cost_weight > 0) { + cost_functions_.emplace_back(new ReadPendingNodeCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.write_pending_node_cost_weight > 0) { + cost_functions_.emplace_back(new WritePendingNodeCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.scan_pending_node_cost_weight > 0) { + cost_functions_.emplace_back(new ScanPendingNodeCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.tablet_count_cost_weight > 0) { + cost_functions_.emplace_back(new TabletCountCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.size_cost_weight > 0) { + cost_functions_.emplace_back(new SizeCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.read_load_cost_weight > 0) { + cost_functions_.emplace_back(new ReadLoadCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.write_load_cost_weight > 0) { + cost_functions_.emplace_back(new WriteLoadCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.scan_load_cost_weight > 0) { + cost_functions_.emplace_back(new ScanLoadCostFunction(options)); + VLOG(20) << "[lb] " << cost_functions_[cost_functions_.size() - 1]->Name() << " enabled"; + } + + // action generators + action_generators_.emplace_back(new RandomActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + if (lb_options_.tablet_count_cost_weight > 0) { + action_generators_.emplace_back(new TabletCountActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.size_cost_weight > 0) { + action_generators_.emplace_back(new SizeActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.read_load_cost_weight > 0) { + action_generators_.emplace_back(new ReadLoadActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.write_load_cost_weight > 0) { + action_generators_.emplace_back(new WriteLoadActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } + if (lb_options_.scan_load_cost_weight > 0) { + action_generators_.emplace_back(new ScanLoadActionGenerator()); + VLOG(20) << "[lb] " << action_generators_[action_generators_.size() - 1]->Name() << " enabled"; + } +} + +UnityBalancer::~UnityBalancer() { +} + +bool UnityBalancer::BalanceCluster( + const std::vector>& lb_nodes, + std::vector* plans) { + return BalanceCluster("", lb_nodes, plans); +} + +bool UnityBalancer::BalanceCluster( + const std::string& table_name, + const std::vector>& lb_nodes, + std::vector* plans) { + if (lb_nodes.size() <= 1 || plans == nullptr) { + return false; + } + + VLOG (5) << "[lb] BalanceCluster for table:" << table_name << " begin"; + + std::shared_ptr cluster = std::make_shared(lb_nodes, lb_options_); + + if (lb_options_.debug_mode_enabled) { + cluster->DebugCluster(); + } + + InitCostFunctions(cluster); + + if (!NeedBalance(cluster)) { + return true; + } + + uint64_t max_steps = std::min(lb_options_.max_compute_steps, static_cast(lb_options_.max_compute_steps_per_tablet * cluster->tablet_num_)); + double init_cost = ComputeCost(std::numeric_limits::max()); + double current_cost = init_cost; + + VLOG(5) << "[lb] compute begin, max_steps:" << max_steps << " init_cost:" << init_cost; + + int64_t start_time_ns = get_micros(); + int64_t cost_time_ms = 0; + uint64_t step = 0; + for (step = 0; step < max_steps; ++step) { + std::shared_ptr action(NextAction(cluster)); + VLOG(20) << "[lb] step:" << step << " action:" << action->ToString(); + + if (!cluster->ValidAction(action)) { + continue; + } + + cluster->DoAction(action); + + if (lb_options_.debug_mode_enabled) { + cluster->DebugCluster(); + } + + double new_cost = ComputeCost(current_cost); + if (new_cost < current_cost) { + VLOG(20) << "[lb] got lower cost!"; + current_cost = new_cost; + } else { + std::shared_ptr undo_action(action->UndoAction()); + VLOG(20) << "[lb] undo action:" << undo_action->ToString(); + cluster->DoAction(undo_action); + + if (lb_options_.debug_mode_enabled) { + cluster->DebugCluster(); + } + } + + cost_time_ms = (get_micros() - start_time_ns) / 1000; + if (static_cast(cost_time_ms) > lb_options_.max_compute_time_ms) { + VLOG(5) << "[lb] stop computing since time reach to max_compute_time_ms_:" + << lb_options_.max_compute_time_ms; + break; + } + } + + VLOG(5) << "[lb] compute end, cost time(ms):" << cost_time_ms + << " cost steps:" << step + << " init cost:" << init_cost + << " new cost:" << current_cost; + + if (current_cost < init_cost) { + CreatePlans(cluster, plans); + VLOG(5) << "[lb] balance plan size:" << plans->size(); + } else { + VLOG(5) << "[lb] no better balance plan"; + } + + VLOG (5) << "[lb] BalanceCluster for table:" << table_name << " end"; + + return true; +} + +bool UnityBalancer::NeedBalance(const std::shared_ptr& cluster) { + double total_cost = 0.0; + double total_weight = 0.0; + + for (const auto& cost_func : cost_functions_) { + double weight = cost_func->GetWeight(); + if (weight <= 0) { + continue; + } + + total_weight += weight; + total_cost += cost_func->Cost() * weight; + } + double cost = total_weight == 0 ? 0 : total_cost / total_weight; + + VLOG(5) << "[lb] NeedBalance compute, total_cost:" << total_cost + << " total_weight:" << total_weight + << " cost:" << cost + << " min_cost_need_balance:" << lb_options_.min_cost_need_balance; + + if (total_cost <= 0 || total_weight <= 0 || cost < lb_options_.min_cost_need_balance) { + LOG(INFO) << "[lb] no need to balance"; + return false; + } else { + return true; + } +} + +void UnityBalancer::InitCostFunctions(const std::shared_ptr& cluster) { + for (const auto& cost_func : cost_functions_) { + cost_func->Init(cluster); + } +} + +double UnityBalancer::ComputeCost(double previous_cost) { + VLOG(20) << "[lb] ComputeCost begin, previous cost:" << previous_cost; + double total_cost = 0.0; + + for (const auto& cost_func : cost_functions_) { + double weight = cost_func->GetWeight(); + if (weight <= 0) { + continue; + } + double cost = cost_func->Cost(); + total_cost += cost * weight; + VLOG(20) << "[lb] " << cost_func->Name() << " cost:" << cost << " weight:" << weight; + if (total_cost > previous_cost) { + break; + } + } + + VLOG(20) << "[lb] ComputeCost end, new cost:" << total_cost; + return total_cost; +} + +Action* UnityBalancer::NextAction(const std::shared_ptr& cluster) { + uint32_t rand = Random::Rand(0, action_generators_.size()); + return action_generators_[rand]->Generate(cluster); +} + +void UnityBalancer::CreatePlans(const std::shared_ptr& cluster, std::vector* plans) { + plans->clear(); + + for (uint32_t i = 0; i < cluster->tablet_index_to_node_index_.size(); ++i) { + uint32_t initial_node_index = cluster->initial_tablet_index_to_node_index_[i]; + uint32_t new_node_index = cluster->tablet_index_to_node_index_[i]; + + if (initial_node_index != new_node_index) { + // tablet has been moved to another tablet node + Plan plan(cluster->tablets_[i]->tablet_ptr, + cluster->nodes_[initial_node_index]->tablet_node_ptr, + cluster->nodes_[new_node_index]->tablet_node_ptr); + plans->emplace_back(plan); + } + } +} + +} // namespace load_balancer +} // namespace tera diff --git a/src/load_balancer/unity_balancer.h b/src/load_balancer/unity_balancer.h new file mode 100644 index 000000000..522acabff --- /dev/null +++ b/src/load_balancer/unity_balancer.h @@ -0,0 +1,58 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_UNITY_BALANCER_H_ +#define TERA_LOAD_BALANCER_UNITY_BALANCER_H_ + +#include +#include + +#include "load_balancer/action_generators.h" +#include "load_balancer/actions.h" +#include "load_balancer/balancer.h" +#include "load_balancer/cluster.h" +#include "load_balancer/cost_functions.h" + +namespace tera { +namespace load_balancer { + +class UnityBalancer : public Balancer { +public: + explicit UnityBalancer(const LBOptions& options); + virtual ~UnityBalancer(); + + virtual bool BalanceCluster( + const std::vector>& lb_nodes, + std::vector* plans) override; + + // if table_name is empty, balance whole culster, + // otherwhise balance the specified table of table_name + virtual bool BalanceCluster( + const std::string& table_name, + const std::vector>& lb_nodes, + std::vector* plans) override; + + virtual bool NeedBalance(const std::shared_ptr& cluster); + +protected: + virtual void InitCostFunctions(const std::shared_ptr& cluster); + + virtual double ComputeCost(double previous_cost); + + virtual Action* NextAction(const std::shared_ptr& cluster); + + // diff the initial cluster state with the current cluster state, then create plans + virtual void CreatePlans(const std::shared_ptr& cluster, std::vector* plans); + +private: + std::vector> cost_functions_; + std::vector> action_generators_; + + LBOptions lb_options_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_UNITY_BALANCER_H_ diff --git a/src/master/availability.cc b/src/master/availability.cc index 998c14a8e..1b5c85b71 100644 --- a/src/master/availability.cc +++ b/src/master/availability.cc @@ -24,15 +24,50 @@ DECLARE_string(tera_master_meta_table_path); namespace tera { namespace master { +static std::string GetNameFromPath(const std::string& path) { + if (path == FLAGS_tera_master_meta_table_path) { + return FLAGS_tera_master_meta_table_name; + } + std::vector t; + SplitString(path, "/", &t); // table_name/tablet00...001 + if (!t.empty()) { + return t[0]; + } else { + return ""; + } +} + + TabletAvailability::TabletAvailability(std::shared_ptr t) : tablet_manager_(t) { start_ts_ = get_micros(); } -void TabletAvailability::AddNotReadyTablet(const std::string& path) { +void TabletAvailability::AddNotReadyTablet(const std::string& path, + const TabletStatus& tablet_status, + const TableStatus& table_status) { + if (tablet_status == kTableReady || table_status == kTableDisable) { + return; + } + MutexLock lock(&mutex_); int64_t ts = get_micros(); tablets_.insert(std::pair(path, ts)); + auto iter = not_ready_tablet_metrics_.emplace( + path, + MetricCounter{ + metric_name_, + "table:" + GetNameFromPath(path) + ",tablet:" + path, + {SubscriberType::LATEST}, + false + }); + + if (iter.second) { + VLOG(12) << "[Add NotReady To Metric]: " << static_cast(TabletErrorStatus::kNotReady); + iter.first->second.Set(static_cast(TabletErrorStatus::kNotReady)); + } else { + VLOG(12) << "[Add NotReady To Metric Failed]: " << static_cast(TabletErrorStatus::kNotReady); + } if (tablets_hist_cost_[path].start_ts > 0) { VLOG(10) << "notready again " << path; @@ -51,6 +86,7 @@ void TabletAvailability::AddNotReadyTablet(const std::string& path) { void TabletAvailability::EraseNotReadyTablet(const std::string& path) { MutexLock lock(&mutex_); tablets_.erase(path); + not_ready_tablet_metrics_.erase(path); if (tablets_hist_cost_.find(path) == tablets_hist_cost_.end() || tablets_hist_cost_[path].start_ts == 0) { @@ -71,22 +107,13 @@ void TabletAvailability::EraseNotReadyTablet(const std::string& path) { << ", reready " << tablets_hist_cost_[path].reready_num; } -static std::string GetNameFromPath(const std::string& path) { - if (path == FLAGS_tera_master_meta_table_path) { - return FLAGS_tera_master_meta_table_name; - } - std::vector t; - SplitString(path, "/", &t); // table_name/tablet00...001 - return t[0]; -} - void TabletAvailability::LogAvailability() { MutexLock lock(&mutex_); int64_t not_avai_count = 0; int64_t not_avai_warning = 0; int64_t not_avai_error = 0; int64_t not_avai_fatal = 0; - int64_t start = ::common::timer::get_micros(); + int64_t start = get_micros(); std::map::iterator it; for (it = tablets_.begin(); it != tablets_.end(); ++it) { std::string table_name = GetNameFromPath(it->first); @@ -99,22 +126,28 @@ void TabletAvailability::LogAvailability() { continue; } + auto metric_iter = not_ready_tablet_metrics_.find(it->first); + assert(metric_iter != not_ready_tablet_metrics_.end()); + if ((start - it->second) > FLAGS_tera_master_not_available_threshold * 1000 * 1000LL) { VLOG(12) << "[availability] not available:" << it->first; not_avai_count++; } if ((start - it->second) > FLAGS_tera_master_availability_fatal_threshold * 1000 * 1000LL) { not_avai_fatal++; + metric_iter->second.Set(static_cast(TabletErrorStatus::kFatal)); if (FLAGS_tera_master_availability_show_details_enabled) { LOG(INFO) << "[availability] fatal-tablet:" << it->first; } } else if ((start - it->second) > FLAGS_tera_master_availability_error_threshold * 1000 * 1000LL) { not_avai_error++; + metric_iter->second.Set(static_cast(TabletErrorStatus::kError)); if (FLAGS_tera_master_availability_show_details_enabled) { LOG(INFO) << "[availability] error-tablet:" << it->first; } } else if ((start - it->second) > FLAGS_tera_master_availability_warning_threshold * 1000 * 1000LL) { not_avai_warning++; + metric_iter->second.Set(static_cast(TabletErrorStatus::kWarning)); } } @@ -155,6 +188,9 @@ void TabletAvailability::LogAvailability() { } } int64_t nr_notready_tablets = tablets_hist_cost_.size(); + double time_percent = 1.0 - (double)total_time / (all_time * all_tablets + 1); + ready_time_percent.Set(static_cast(time_percent * 100)); + LOG(INFO) << "[availability][tablet_staticstic] time_interval: " << all_time / 1000 << ", notready_time: " << total_time / 1000 << ", total_time: " << (all_time * all_tablets) / 1000 @@ -165,7 +201,7 @@ void TabletAvailability::LogAvailability() { << ", notready_count: " << total_notready << ", reready_count: " << total_reready; - int64_t cost = ::common::timer::get_micros() - start; + int64_t cost = get_micros() - start; LOG(INFO) << "[availability] cost time:" << cost/1000 << " ms"; } diff --git a/src/master/availability.h b/src/master/availability.h index ddbe6a5f4..d6c133c72 100644 --- a/src/master/availability.h +++ b/src/master/availability.h @@ -6,10 +6,12 @@ #define TERA_MASTER_TABLET_AVAILABILITY_H_ #include +#include #include "master/tablet_manager.h" #include "common/mutex.h" +#include "common/metric/metric_counter.h" namespace tera { namespace master { @@ -25,16 +27,32 @@ class TabletAvailability { public: TabletAvailability(std::shared_ptr t); void LogAvailability(); - void AddNotReadyTablet(const std::string& id); + void AddNotReadyTablet(const std::string& path, + const TabletStatus& tablet_status, + const TableStatus& table_status); void EraseNotReadyTablet(const std::string& id); private: + + enum class TabletErrorStatus { + kNotReady = 1, + kFatal = 2, + kError = 3, + kWarning = 4 + }; + Mutex mutex_; std::shared_ptr tablet_manager_; + std::map tablets_; + std::map not_ready_tablet_metrics_; + MetricCounter ready_time_percent{"tera_master_tablet_ready_time_percent", + {SubscriberType::LATEST}, + false}; int64_t start_ts_; std::map tablets_hist_cost_; + const std::string metric_name_{"tera_master_tablet_availability"}; }; } // master diff --git a/src/master/gc_strategy.cc b/src/master/gc_strategy.cc index d87c96848..b87d113b1 100644 --- a/src/master/gc_strategy.cc +++ b/src/master/gc_strategy.cc @@ -8,12 +8,12 @@ #include "db/filename.h" #include "io/utils_leveldb.h" - +#include "leveldb/env_dfs.h" DECLARE_string(tera_tabletnode_path_prefix); DECLARE_string(tera_master_meta_table_name); DECLARE_int32(tera_garbage_collect_debug_log); - +DECLARE_string(tera_leveldb_env_type); namespace tera { namespace master { @@ -147,7 +147,15 @@ bool BatchGcStrategy::CollectSingleDeadTablet(const std::string& tablename, uint env->GetChildren(tablet_path, &children); list_count_.Inc(); if (children.size() == 0) { - LOG(INFO) << "[gc] delete empty tablet dir: " << tablet_path; + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the parent directory + leveldb::Status s = env->LockFile(tablet_path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << tablet_path << ", status: " << s.ToString(); + } + + delete file_lock; + env->DeleteDir(tablet_path); return false; } @@ -157,6 +165,14 @@ bool BatchGcStrategy::CollectSingleDeadTablet(const std::string& tablename, uint uint64_t number = 0; if (ParseFileName(children[lg], &number, &type)) { LOG(INFO) << "[gc] delete: " << lg_path; + + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the parent directory + leveldb::Status s = env->LockFile(tablet_path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << tablet_path << ", status: " << s.ToString(); + } + env->DeleteFile(lg_path); continue; } @@ -173,6 +189,13 @@ bool BatchGcStrategy::CollectSingleDeadTablet(const std::string& tablename, uint list_count_.Inc(); if (files.size() == 0) { LOG(INFO) << "[gc] delete empty lg dir: " << lg_path; + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the parent directory + leveldb::Status s = env->LockFile(tablet_path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << tablet_path << ", status: " << s.ToString(); + } + delete file_lock; env->DeleteDir(lg_path); continue; } @@ -184,6 +207,13 @@ bool BatchGcStrategy::CollectSingleDeadTablet(const std::string& tablename, uint if (!ParseFileName(files[f], &number, &type) || type != leveldb::kTableFile) { // only keep sst, delete rest files + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the parent directory + leveldb::Status s = env->LockFile(lg_path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << lg_path << ", status: " << s.ToString(); + } + delete file_lock; io::DeleteEnvDir(file_path); continue; } @@ -214,7 +244,20 @@ void BatchGcStrategy::DeleteObsoleteFiles() { for (size_t lg = 0; lg < file_set.size(); ++lg) { std::set::iterator it = file_set[lg].begin(); for (; it != file_set[lg].end(); ++it) { - std::string file_path = leveldb::BuildTableFilePath(tablepath, lg, *it); + uint64_t tablet = 0; + uint64_t number = 0; + leveldb::ParseFullFileNumber(*it, &tablet, &number); + std::string file_path = leveldb::BuildTableFilePath(tablepath, tablet, lg, number); + std::string lg_path = leveldb::BuildTabletLgPath(tablepath, tablet, lg); + + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the parent directory + leveldb::Status s = env->LockFile(lg_path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << lg_path << ", status: " << s.ToString(); + } + delete file_lock; + LOG(INFO) << "[gc] delete: " << file_path; env->DeleteFile(file_path); file_delete_num_++; @@ -223,390 +266,5 @@ void BatchGcStrategy::DeleteObsoleteFiles() { } } -IncrementalGcStrategy::IncrementalGcStrategy(std::shared_ptr tablet_manager) - : tablet_manager_(tablet_manager), - last_gc_time_(std::numeric_limits::max()), - max_ts_(std::numeric_limits::max()) {} - -bool IncrementalGcStrategy::PreQuery () { - int64_t start_ts = get_micros(); - std::vector tables; - tablet_manager_->ShowTable(&tables, NULL); - - for (size_t i = 0; i < tables.size(); ++i) { - TabletFiles tablet_files; - std::string table_name = tables[i]->GetTableName(); - if (table_name == FLAGS_tera_master_meta_table_name) continue; - dead_tablet_files_.insert(std::make_pair(table_name, tablet_files)); - live_tablet_files_.insert(std::make_pair(table_name, tablet_files)); - - std::set live_tablets, dead_tablets; - if (!tables[i]->GetTabletsForGc(&live_tablets, &dead_tablets, true)) { - continue; - } - std::set::iterator it; - // update dead tablets - for (it = dead_tablets.begin(); it != dead_tablets.end(); ++it) { - TabletFiles& temp_tablet_files = dead_tablet_files_[table_name]; - TabletFileSet tablet_file_set(get_micros() / 1000000, 0); - bool ret = temp_tablet_files.insert(std::make_pair(*it, tablet_file_set)).second; - if (ret) { - VLOG(10) << "[gc] newly dead talbet: " << leveldb::GetTabletPathFromNum(table_name, *it); - if (!CollectSingleDeadTablet(table_name, *it)) { - // collect from DFS fails, so rollback memory status, retry in the next time - assert(dead_tablet_files_[table_name].erase(*it) == 1); - } - } else { - VLOG(20) << "[gc] old dead talbet: " << leveldb::GetTabletPathFromNum(table_name, *it); - } - } - - // erase newly dead tablets from live tablets - for (TabletFiles::iterator it = live_tablet_files_[table_name].begin(); - it != live_tablet_files_[table_name].end();) { - if (dead_tablet_files_[table_name].find(static_cast(it->first)) != dead_tablet_files_[table_name].end()) { - live_tablet_files_[table_name].erase(it++); - } else { - ++it; - } - } - - // add new live tablets - for (it = live_tablets.begin(); it != live_tablets.end(); ++it) { - TabletFiles& temp_tablet_files = live_tablet_files_[table_name]; - TabletFileSet tablet_file_set; - temp_tablet_files.insert(std::make_pair(*it, tablet_file_set)); - } - } - if (FLAGS_tera_garbage_collect_debug_log) { - DEBUG_print_files(true); - DEBUG_print_files(false); - } - LOG(INFO) << "[gc] Gather dead tablets, cost: " << (get_micros() - start_ts) / 1000 << "ms."; - - // do not need gc if there is no new dead tablet - if (dead_tablet_files_.size() == 0) { - LOG(INFO) << "[gc] Do not need gc this time"; - } - return dead_tablet_files_.size() != 0; -} - -void IncrementalGcStrategy::ProcessQueryCallbackForGc(QueryResponse* response) { - LOG(INFO) << "[gc] ProcessQueryCallbackForGc"; - MutexLock lock(&gc_mutex_); - - std::set ready_tables; - for (int table = 0; table < response->inh_live_files_size(); ++table) { - ready_tables.insert(response->inh_live_files(table).table_name()); - } - - // update tablet ready time - for (int i = 0; i < response->tabletmeta_list().meta_size(); ++i) { - const TabletMeta& meta = response->tabletmeta_list().meta(i); - std::string table_name = meta.table_name(); - if (table_name == FLAGS_tera_master_meta_table_name) continue; - if (live_tablet_files_.find(table_name) == live_tablet_files_.end() || - ready_tables.find(table_name) == ready_tables.end()) { - continue; - } - int64_t tablet_number = static_cast(leveldb::GetTabletNumFromPath(meta.path())); - VLOG(15) << "[gc] see live tablet " << leveldb::GetTabletPathFromNum(table_name, tablet_number); - if (live_tablet_files_[table_name].find(tablet_number) == live_tablet_files_[table_name].end()) continue; - live_tablet_files_[table_name][tablet_number].ready_time_ = get_micros() / 1000000; - } - - // insert live files - for (int table = 0; table < response->inh_live_files_size(); ++table) { - InheritedLiveFiles live_files = response->inh_live_files(table); - std::string table_name = live_files.table_name(); - if (table_name == FLAGS_tera_master_meta_table_name) continue; - VLOG(12) << "[gc] inh pb: " << response->inh_live_files(table).ShortDebugString(); - if (live_tablet_files_.find(table_name) == live_tablet_files_.end()) continue; - // collect live files - TabletFiles temp_tablet_files; - for (int lg = 0; lg < live_files.lg_live_files_size(); ++lg) { - LgInheritedLiveFiles lg_live_files = live_files.lg_live_files(lg); - uint32_t lg_no = lg_live_files.lg_no(); - for (int i = 0; i < lg_live_files.file_number_size(); ++i) { - uint64_t tablet_number, file; - uint64_t file_number = lg_live_files.file_number(i); - leveldb::ParseFullFileNumber(file_number, &tablet_number, &file); - if (dead_tablet_files_[table_name].find(tablet_number) == - dead_tablet_files_[table_name].end()) { - VLOG(12) << "[gc] skip live tablet " << tablet_number; - continue; - } - TabletFileSet tablet_file_set; - temp_tablet_files.insert(std::make_pair(tablet_number, tablet_file_set)); - TabletFileSet& temp_tablet_file_set = temp_tablet_files[tablet_number]; - LgFileSet lg_files; - temp_tablet_file_set.files_.insert(std::make_pair(lg_no, lg_files)); - temp_tablet_file_set.files_[lg_no].live_files_.insert(file_number); - VLOG(12) << "[gc] insert live file " << leveldb::GetTabletPathFromNum(table_name, tablet_number) << "/" << lg_no << "/" << file; - const LgFileSet& check = ((dead_tablet_files_[table_name][tablet_number]).files_)[lg_no]; - if (check.storage_files_.find(file_number) == check.storage_files_.end()) { - LOG(WARNING) << "[gc] insert error " << leveldb::GetTabletPathFromNum(table_name, tablet_number) << "/" << lg_no << "/" << file; - } - } - } - // update live files in dead tablets - TabletFiles::iterator tablet_it = temp_tablet_files.begin(); - TabletFiles& dead_tablets = dead_tablet_files_[table_name]; - for (; tablet_it != temp_tablet_files.end(); ++tablet_it) { - uint64_t tablet_number = tablet_it->first; - if (dead_tablets.find(tablet_number) == dead_tablets.end()) { - VLOG(12) << "[gc] skip live tablet " << table_name << "/" << tablet_number; - continue; - } - std::map& live_lg = (tablet_it->second).files_; - std::map& dead_lg = dead_tablets[tablet_number].files_; - std::map::iterator lg_it = live_lg.begin(); - for (; lg_it != live_lg.end(); ++lg_it) { - uint32_t lg_no = lg_it->first; - LgFileSet lg_file_set; - dead_lg.insert(std::make_pair(lg_no, lg_file_set)); - for (std::set::iterator it = live_lg[lg_no].live_files_.begin(); it != live_lg[lg_no].live_files_.end(); ++it) { - dead_lg[lg_no].live_files_.insert(*it); - } - VLOG(12) << "[gc] dead tablet's live lg: " << leveldb::GetTabletPathFromNum(table_name, tablet_number) << "/" << lg_no; - } - } - } - if (FLAGS_tera_garbage_collect_debug_log) { - DEBUG_print_files(true); - } -} - -void IncrementalGcStrategy::PostQuery () { - LOG(INFO) << "[gc] PostQuery"; - if (FLAGS_tera_garbage_collect_debug_log) { - DEBUG_print_files(true); - DEBUG_print_files(false); - } - int64_t start_ts = get_micros(); - TableFiles::iterator table_it = dead_tablet_files_.begin(); - for (; table_it != dead_tablet_files_.end(); ++table_it) { - DeleteTableFiles(table_it->first); - } - if (FLAGS_tera_garbage_collect_debug_log) { - DEBUG_print_files(true); - DEBUG_print_files(false); - } - LOG(INFO) << "[gc] Delete useless sst, cost: " << (get_micros() - start_ts) / 1000 << "ms. list_times " << list_count_.Get(); - list_count_.Clear(); -} - -void IncrementalGcStrategy::Clear(std::string tablename) { - LOG(INFO) << "[gc] Clear " << tablename; - MutexLock lock(&gc_mutex_); - dead_tablet_files_.erase(tablename); - live_tablet_files_.erase(tablename); -} - -void IncrementalGcStrategy::DeleteTableFiles(const std::string& table_name) { - std::string table_path = FLAGS_tera_tabletnode_path_prefix + table_name; - leveldb::Env* env = io::LeveldbBaseEnv(); - TabletFiles& dead_tablets = dead_tablet_files_[table_name]; - TabletFiles& live_tablets = live_tablet_files_[table_name]; - int64_t earliest_ready_time = max_ts_; - TabletFiles::iterator tablet_it = live_tablets.begin(); - for (; tablet_it != live_tablets.end(); ++tablet_it) { - if (tablet_it->second.ready_time_ < earliest_ready_time) { - earliest_ready_time = tablet_it->second.ready_time_; - } - } - - if (earliest_ready_time != max_ts_) { - VLOG(12) << "[gc] earliest ready time " << earliest_ready_time << " : " << common::timer::get_time_str(earliest_ready_time); - } else { - VLOG(12) << "[gc] " << table_name << "'s tablets not ready"; - } - std::set gc_tablets; - for (tablet_it = dead_tablets.begin(); tablet_it != dead_tablets.end(); ++tablet_it) { - if (tablet_it->second.dead_time_ < earliest_ready_time) { - gc_tablets.insert(tablet_it->first); - VLOG(12) << "[gc] will gc tablet: " << leveldb::GetTabletPathFromNum(table_name, tablet_it->first); - } - } - - for (std::set::iterator gc_it = gc_tablets.begin(); gc_it != gc_tablets.end();) { - std::map& lg_files = dead_tablets[*gc_it].files_; - std::map::iterator lg_it = lg_files.begin(); - std::string tablet_path = leveldb::GetTabletPathFromNum(table_path, *gc_it); - for (; lg_it != lg_files.end();) { - VLOG(12) << "[gc] entry lg gc lg=" << lg_it->first; - LgFileSet& lg_file_set = lg_it->second; - std::set::iterator file_it = lg_file_set.storage_files_.begin(); - for (; file_it != lg_file_set.storage_files_.end();) { - if (lg_file_set.live_files_.find(*file_it) == lg_file_set.live_files_.end()) { - std::string file_path = - leveldb::BuildTableFilePath(table_path, lg_it->first, *file_it); - - std::string debug_str; - for (std::set::iterator it = lg_file_set.live_files_.begin(); it != lg_file_set.live_files_.end(); ++it) { - uint64_t file_no; - leveldb::ParseFullFileNumber(*it, NULL, &file_no); - debug_str += " " + std::to_string(file_no); - } - // VLOG(12) << "[gc] live = " << debug_str; - LOG(INFO) << "[gc] delete: " << file_path; - if (env->DeleteFile(file_path).ok()) { - lg_file_set.storage_files_.erase(file_it++); - } else { - ++file_it; - // do nothing, try to delete next time - // TODO: if retry times > MAX ? - // TODO: if failed due to timeout but delete ok in DFS, it will always retry - } - } else { - ++file_it; - } - } - if (lg_file_set.storage_files_.size() == 0) { - if (lg_file_set.live_files_.size() != 0) { - uint64_t full_number = *(lg_file_set.live_files_.begin()); - uint64_t tablet_number, file_number; - leveldb::ParseFullFileNumber(full_number, &tablet_number, &file_number); - LOG(ERROR) << "[gc] empty tablet still has live files: " << tablet_number << "/" << lg_it->first << "/" << file_number; - } else { - std::string lg_str = std::to_string(lg_it->first); - std::string lg_path = tablet_path + "/" + lg_str; - LOG(INFO) << "[gc] delete empty lg dir: " << lg_path; - if (io::DeleteEnvDir(lg_path).ok()) { - lg_files.erase(lg_it++); - } else { - ++lg_it; - // do nothing, try to delete next time - // TODO: iff retry times > MAX ? - // TODO: if failed due to timeout but delete ok in DFS, it will always retry - } - } - } else { - ++lg_it; - } - } - - if (lg_files.size() == 0) { - LOG(INFO) << "[gc] delete empty tablet dir: " << tablet_path; - if (env->DeleteDir(tablet_path).ok()) { - dead_tablets.erase(*gc_it); - } else { - LOG(ERROR) << "[gc] rm dir fail: " << tablet_path; - // do nothing, try to delete next time - // TODO: iff retry times > MAX ? - // TODO: if failed due to timeout but delete ok in DFS, it will always retry - } - } else { - // clear live_files_ in dead_tablets for next round of gc - for (lg_it = lg_files.begin(); lg_it != lg_files.end(); ++lg_it) { - VLOG(12) << "[gc] clear live_files_(lg_no/file_no): " << *gc_it << "/" << lg_it->first; - lg_it->second.live_files_.clear(); - } - dead_tablets[*gc_it].dead_time_ = get_micros() / 1000000; - VLOG(12) << "[gc] update dead_time_ " << dead_tablets[*gc_it].dead_time_ << " " << common::timer::get_time_str(dead_tablets[*gc_it].dead_time_); - } - gc_it++; - } -} - -bool IncrementalGcStrategy::CollectSingleDeadTablet(const std::string& tablename, uint64_t tabletnum) { - std::string tablepath = FLAGS_tera_tabletnode_path_prefix + tablename; - std::string tablet_path = leveldb::GetTabletPathFromNum(tablepath, tabletnum); - leveldb::Env* env = io::LeveldbBaseEnv(); - std::vector children; - leveldb::Status s = env->GetChildren(tablet_path, &children); - if (!s.ok()) { - LOG(ERROR) << "[gc] list directory fail: " << tablet_path; - return false; - } - list_count_.Inc(); - - for (size_t lg = 0; lg < children.size(); ++lg) { - std::string lg_path = tablet_path + "/" + children[lg]; - leveldb::FileType type = leveldb::kUnknown; - uint64_t number = 0; - if (ParseFileName(children[lg], &number, &type)) { - LOG(INFO) << "[gc] delete: " << lg_path; - env->DeleteFile(lg_path); - continue; - } - - leveldb::Slice rest(children[lg]); - uint64_t lg_num = 0; - if (!leveldb::ConsumeDecimalNumber(&rest, &lg_num)) { - LOG(INFO) << "[gc] skip unknown dir: " << lg_path; - continue; - } - - std::vector files; - env->GetChildren(lg_path, &files); - list_count_.Inc(); - - int64_t lg_no = std::stoll(children[lg]); - std::map& tablet_files = dead_tablet_files_[tablename][tabletnum].files_; - LgFileSet lg_file_set; - tablet_files.insert(std::make_pair(lg_no, lg_file_set)); - LgFileSet& temp_lg_files_set = tablet_files[lg_no]; - for (size_t f = 0; f < files.size(); ++f) { - std::string file_path = lg_path + "/" + files[f]; - type = leveldb::kUnknown; - number = 0; - if (!ParseFileName(files[f], &number, &type) || - type != leveldb::kTableFile) { - // skip manifest/CURRENT - continue; - } - - uint64_t full_number = leveldb::BuildFullFileNumber(lg_path, number); - temp_lg_files_set.storage_files_.insert(full_number); - } - } - return true; -} - -void IncrementalGcStrategy::DEBUG_print_files(bool print_dead) { - TableFiles all_tablet_files; - if (print_dead == true) { - LOG(INFO) << "----------------------------[gc] Test print DEAD"; - all_tablet_files = dead_tablet_files_; - } else { - LOG(INFO) << "----------------------------[gc] Test print LIVE"; - all_tablet_files = live_tablet_files_; - } - TableFiles::iterator table_it; - for (table_it = all_tablet_files.begin(); table_it != all_tablet_files.end(); ++table_it) { - LOG(INFO) << "[gc] table=" << table_it->first; - TabletFiles& tablet_files = table_it->second; - TabletFiles::iterator tablet_it; - for (tablet_it = tablet_files.begin(); tablet_it != tablet_files.end(); ++tablet_it) { - LOG(INFO) << "[gc] tablet -- " << tablet_it->first; - TabletFileSet tablet_file_set = tablet_it->second; - LOG(INFO) << "[gc] ready -- " << tablet_file_set.ready_time_; - LOG(INFO) << "[gc] dead -- " << tablet_file_set.dead_time_; - std::map& files = tablet_file_set.files_; - std::map::iterator lg_it; - for (lg_it = files.begin(); lg_it != files.end(); ++lg_it) { - std::set& f = (lg_it->second).storage_files_; - std::string debug_str = ""; - for (std::set::iterator it = f.begin(); it != f.end(); ++it) { - uint64_t file_no; - leveldb::ParseFullFileNumber(*it, NULL, &file_no); - debug_str += " " + std::to_string(file_no); - } - LOG(INFO) << "[gc] lg stor -- " << lg_it->first << "-" << (lg_it->second).storage_files_.size() << debug_str; - f = (lg_it->second).live_files_; - debug_str = ""; - for (std::set::iterator it = f.begin(); it != f.end(); ++it) { - uint64_t file_no; - leveldb::ParseFullFileNumber(*it, NULL, &file_no); - debug_str += " " + std::to_string(file_no); - } - LOG(INFO) << "[gc] lg live -- " << lg_it->first << "-" << (lg_it->second).live_files_.size() << debug_str; - } - } - } - LOG(INFO) << "----------------------------[gc] Done Test print"; -} - } // namespace master } // namespace tera diff --git a/src/master/gc_strategy.h b/src/master/gc_strategy.h index cccbd91b0..c68364502 100644 --- a/src/master/gc_strategy.h +++ b/src/master/gc_strategy.h @@ -7,7 +7,7 @@ #include "master/tablet_manager.h" #include "proto/tabletnode_client.h" #include "types.h" -#include "utils/counter.h" +#include "common/counter.h" namespace tera { namespace master { @@ -68,58 +68,6 @@ class BatchGcStrategy : public GcStrategy { tera::Counter list_count_; }; -class IncrementalGcStrategy : public GcStrategy{ -public: - IncrementalGcStrategy(std::shared_ptr tablet_manager); - virtual ~IncrementalGcStrategy() {} - - // get dead tablets - virtual bool PreQuery (); - - // gather live files - virtual void ProcessQueryCallbackForGc(QueryResponse* response); - - // delete dead files - virtual void PostQuery (); - - // clear memory when table is deleted - virtual void Clear(std::string tablename); - -private: - void DEBUG_print_files(bool print_dead); - bool CollectSingleDeadTablet(const std::string& tablename, uint64_t tabletnum); - void DeleteTableFiles(const std::string& table_name); - - struct LgFileSet { - std::set storage_files_; - std::set live_files_; - }; - - struct TabletFileSet { - int64_t dead_time_; - int64_t ready_time_; - std::map files_; // lg_no -> files - TabletFileSet() { - dead_time_ = std::numeric_limits::max(); - ready_time_ = 0; - }; - TabletFileSet(int64_t dead_time, int64_t ready_time) { - dead_time_ = dead_time; - ready_time_ = ready_time; - } - }; - - typedef std::map TabletFiles; // tablet_number -> files - typedef std::map TableFiles; // table_name -> files - mutable Mutex gc_mutex_; - std::shared_ptr tablet_manager_; - int64_t last_gc_time_; - TableFiles dead_tablet_files_; - TableFiles live_tablet_files_; - int64_t max_ts_; - tera::Counter list_count_; -}; - } // namespace master } // namespace tera diff --git a/src/master/master_entry.cc b/src/master/master_entry.cc index 1e958c028..13c9d276d 100644 --- a/src/master/master_entry.cc +++ b/src/master/master_entry.cc @@ -7,6 +7,7 @@ #include #include +#include "common/metric/collector_report.h" #include "common/net/ip_address.h" #include "master/master_impl.h" #include "master/remote_master.h" @@ -15,6 +16,8 @@ DECLARE_string(tera_master_port); DECLARE_int32(tera_master_rpc_server_max_inflow); DECLARE_int32(tera_master_rpc_server_max_outflow); +DECLARE_bool(tera_metric_http_server_enable); +DECLARE_int32(tera_metric_http_server_listen_port); std::string GetTeraEntryName() { return "master"; @@ -30,7 +33,8 @@ namespace master { MasterEntry::MasterEntry() : master_impl_(NULL), remote_master_(NULL), - rpc_server_(NULL) { + rpc_server_(NULL), + metric_http_server_(new tera::MetricHttpServer()) { sofa::pbrpc::RpcServerOptions rpc_options; rpc_options.max_throughput_in = FLAGS_tera_master_rpc_server_max_inflow; rpc_options.max_throughput_out = FLAGS_tera_master_rpc_server_max_outflow; @@ -57,10 +61,20 @@ bool MasterEntry::StartServer() { } LOG(INFO) << "finish starting master server"; + + // start metric http server + if (FLAGS_tera_metric_http_server_enable) { + if(!metric_http_server_->Start(FLAGS_tera_metric_http_server_listen_port)) { + LOG(WARNING) << "Start metric http server failed. Ignore"; + } + } else { + LOG(INFO) << "Metric http server is disabled."; + } return true; } bool MasterEntry::Run() { + CollectorReportPublisher::GetInstance().Refresh(); static int64_t timer_ticks = 0; ++timer_ticks; @@ -73,6 +87,7 @@ bool MasterEntry::Run() { } void MasterEntry::ShutdownServer() { + metric_http_server_->Stop(); rpc_server_->Stop(); master_impl_.reset(); } diff --git a/src/master/master_entry.h b/src/master/master_entry.h index c8f738916..919da4928 100644 --- a/src/master/master_entry.h +++ b/src/master/master_entry.h @@ -8,6 +8,7 @@ #include #include "common/base/scoped_ptr.h" +#include "common/metric/metric_http_server.h" #include "tera_entry.h" namespace tera { @@ -33,6 +34,7 @@ class MasterEntry : public TeraEntry { // scoped_ptr remote_master_; RemoteMaster* remote_master_; scoped_ptr rpc_server_; + scoped_ptr metric_http_server_; }; } // namespace master diff --git a/src/master/master_impl.cc b/src/master/master_impl.cc index 598faa4ec..f667c7d0f 100644 --- a/src/master/master_impl.cc +++ b/src/master/master_impl.cc @@ -26,7 +26,7 @@ #include "utils/config_utils.h" #include "utils/schema_utils.h" #include "utils/string_util.h" -#include "utils/timer.h" +#include "common/timer.h" #include "utils/utils_cmd.h" DECLARE_string(tera_master_port); @@ -48,11 +48,15 @@ DECLARE_string(tera_master_meta_table_name); DECLARE_string(tera_master_meta_table_path); DECLARE_int32(tera_master_meta_retry_times); +DECLARE_string(tera_coord_type); DECLARE_bool(tera_zk_enabled); DECLARE_bool(tera_mock_zk_enabled); DECLARE_double(tera_master_workload_split_threshold); +DECLARE_double(tera_master_workload_merge_threshold); DECLARE_int64(tera_master_split_tablet_size); +DECLARE_int64(tera_master_min_split_size); +DECLARE_double(tera_master_min_split_ratio); DECLARE_int64(tera_master_merge_tablet_size); DECLARE_bool(tera_master_kick_tabletnode_enabled); DECLARE_int32(tera_master_kick_tabletnode_query_fail_times); @@ -84,6 +88,8 @@ DECLARE_bool(tera_master_stat_table_enabled); DECLARE_int64(tera_master_stat_table_splitsize); DECLARE_int32(tera_master_gc_period); +DECLARE_bool(tera_master_gc_trash_enabled); +DECLARE_int64(tera_master_gc_trash_clean_period_s); DECLARE_string(tera_tabletnode_path_prefix); DECLARE_string(tera_leveldb_env_type); @@ -108,6 +114,7 @@ DECLARE_int32(tera_master_schema_update_retry_times); DECLARE_int64(tera_master_availability_check_period); DECLARE_bool(tera_master_availability_check_enabled); +DECLARE_bool(tera_master_update_split_meta); using namespace std::placeholders; namespace tera { @@ -131,6 +138,8 @@ MasterImpl::MasterImpl() thread_pool_(new ThreadPool(FLAGS_tera_master_impl_thread_max_num)), is_stat_table_(false), stat_table_(NULL), + gc_trash_clean_enabled_(false), + gc_trash_clean_timer_id_(kInvalidTimerId), gc_enabled_(false), gc_timer_id_(kInvalidTimerId), gc_query_enable_(false), @@ -152,15 +161,12 @@ MasterImpl::MasterImpl() if (FLAGS_tera_master_gc_strategy == "default") { LOG(INFO) << "[gc] gc strategy is BatchGcStrategy"; gc_strategy_ = std::shared_ptr(new BatchGcStrategy(tablet_manager_)); - } else if (FLAGS_tera_master_gc_strategy == "incremental") { - LOG(INFO) << "[gc] gc strategy is IncrementalGcStrategy"; - gc_strategy_ = std::shared_ptr(new IncrementalGcStrategy(tablet_manager_)); } else if (FLAGS_tera_master_gc_strategy == "trackable") { LOG(INFO) << "[gc] gc strategy is Trackable"; } else { - LOG(WARNING) << "Unknown gc strategy: " << FLAGS_tera_master_gc_strategy - << ", default gc strategy: BatchGcStrategy will take effect"; - gc_strategy_ = std::shared_ptr(new BatchGcStrategy(tablet_manager_)); + LOG(ERROR) << "Unknown gc strategy: " << FLAGS_tera_master_gc_strategy + << ", exit"; + exit(EXIT_FAILURE); } } @@ -171,18 +177,29 @@ MasterImpl::~MasterImpl() { } bool MasterImpl::Init() { - if (FLAGS_tera_zk_enabled) { + if (FLAGS_tera_coord_type.empty()) { + LOG(ERROR) << "Note: We don't recommend that use '" + << "--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster coord" + << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]'" + << " flag is usually recommended."; + } + if (FLAGS_tera_coord_type == "zk" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { zk_adapter_.reset(new MasterZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_ins_enabled) { + } else if (FLAGS_tera_coord_type == "ins" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { LOG(INFO) << "ins mode" ; zk_adapter_.reset(new InsMasterZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_mock_zk_enabled) { + } else if (FLAGS_tera_coord_type == "mock_zk" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { LOG(INFO) << "mock zk mode" ; zk_adapter_.reset(new MockMasterZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_mock_ins_enabled) { + } else if (FLAGS_tera_coord_type == "mock_ins" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { LOG(INFO) << "mock ins mode" ; zk_adapter_.reset(new MockInsMasterZkAdapter(this, local_addr_)); - } else { + } else if (FLAGS_tera_coord_type == "fake_zk" + || FLAGS_tera_coord_type.empty()) { LOG(INFO) << "fake zk mode!"; zk_adapter_.reset(new FakeMasterZkAdapter(this, local_addr_)); } @@ -406,18 +423,22 @@ void MasterImpl::RestoreUserTablet(const std::vector& report_meta_li VLOG(8) << "READY Tablet, " << tablet; continue; } - tablet_availability_->AddNotReadyTablet(tablet->GetPath()); CHECK(tablet->GetStatus() == kTableNotInit); + tablet_availability_->AddNotReadyTablet(tablet->GetPath(), tablet->GetStatus(), + tablet->GetTable()->GetStatus()); TabletNodePtr node; if (server_addr.empty()) { tablet->SetStatus(kTableOffLine); + ProcessOffLineTablet(tablet); VLOG(8) << "OFFLINE Tablet with empty addr, " << tablet; } else if (!tabletnode_manager_->FindTabletNode(server_addr, &node)) { tablet->SetStatus(kTableOffLine); + ProcessOffLineTablet(tablet); VLOG(8) << "OFFLINE Tablet of Dead TS, " << tablet; } else if (node->state_ == kReady) { tablet->SetStatus(kTableOffLine); + ProcessOffLineTablet(tablet); VLOG(8) << "OFFLINE Tablet of Alive TS, " << tablet; TryLoadTablet(tablet, server_addr); } else { @@ -1142,6 +1163,7 @@ void MasterImpl::ShowTables(const ShowTablesRequest* request, TabletPtr tablet = tablet_list[i]; TabletMeta meta; tablet->ToMeta(&meta); + meta.set_last_move_time_us(tablet->LastMoveTime()); tablet_meta_list->add_meta()->CopyFrom(meta); tablet_meta_list->add_counter()->CopyFrom(tablet->GetCounter()); tablet_meta_list->add_timestamp(tablet->UpdateTime()); @@ -1266,6 +1288,8 @@ void MasterImpl::CmdCtrl(const CmdCtrlRequest* request, ReloadConfig(response); } else if (request->command() == "kick") { KickTabletNodeCmdCtrl(request, response); + } else if (request->command() == "table") { + TableCmdCtrl(request, response); } else { response->set_status(kInvalidArgument); } @@ -1458,13 +1482,42 @@ void MasterImpl::ReloadConfig(CmdCtrlResponse* response) { } } -void MasterImpl::TabletCmdCtrl(const CmdCtrlRequest* request, - CmdCtrlResponse* response) { +void MasterImpl::TableCmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response) { if (request->arg_list_size() < 2) { response->set_status(kInvalidArgument); return; } + if (request->arg_list(0) == "split") { + TabletPtr tablet; + StatusCode status; + for (int32_t i = 2; i < request->arg_list_size(); i++) { + if (!tablet_manager_->SearchTablet(request->arg_list(1), + request->arg_list(i), + &tablet, &status)) { + response->set_status(kInvalidArgument); + return; + } + VLOG(10) << "table split: key " << request->arg_list(i) + << ", " << tablet; + TrySplitTablet(tablet, request->arg_list(i)); + } + response->set_status(kMasterOk); + } else { + response->set_status(kInvalidArgument); + } + return; +} + +void MasterImpl::TabletCmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response) { + int32_t request_argc = request->arg_list_size(); + if (request_argc < 2) { + response->set_status(kInvalidArgument); + return; + } + const std::string& op = request->arg_list(0); const std::string& tablet_id = request->arg_list(1); TabletPtr tablet; bool found = false; @@ -1483,40 +1536,35 @@ void MasterImpl::TabletCmdCtrl(const CmdCtrlRequest* request, return; } - if (request->arg_list(0) == "reload") { + if (op == "reload" && request_argc == 2) { std::string current_server_addr = tablet->GetServerAddr(); TryMoveTablet(tablet, current_server_addr, true); // force to unload and load tablet even it on the same ts - - } else if (request->arg_list(0) == "move") { - if (request->arg_list_size() > 3) { - response->set_status(kInvalidArgument); - return; - } - std::string expect_server_addr; - if (request->arg_list_size() == 3) { - expect_server_addr = request->arg_list(2); - } + response->set_status(kMasterOk); + } else if (op == "reloadx" && request_argc == 3 + && tablet->SetErrorIgnoredLGs(request->arg_list(2))) { + std::string current_server_addr = tablet->GetServerAddr(); + TryMoveTablet(tablet, current_server_addr, true); + response->set_status(kMasterOk); + } else if (op == "move" && request_argc == 3) { + std::string expect_server_addr = request->arg_list(2); TryMoveTablet(tablet, expect_server_addr); response->set_status(kMasterOk); - } else if (request->arg_list(0) == "split") { - if (request->arg_list_size() > 3) { - response->set_status(kInvalidArgument); - return; - } + } else if (op == "movex" && request_argc == 4 + && tablet->SetErrorIgnoredLGs(request->arg_list(3))) { + std::string expect_server_addr = request->arg_list(2); + TryMoveTablet(tablet, expect_server_addr); + response->set_status(kMasterOk); + } else if (op == "split" && (request_argc == 2 || request_argc == 3)) { std::string split_key; - if (request->arg_list_size() == 3) { + if (request_argc == 3) { split_key = request->arg_list(2); LOG(INFO) << "User specified split key: " << split_key; } TrySplitTablet(tablet, split_key); response->set_status(kMasterOk); - } else if (request->arg_list(0) == "merge") { - if (request->arg_list_size() > 3) { - response->set_status(kInvalidArgument); - return; - } + } else if (op == "merge" && request_argc == 2) { TryMergeTablet(tablet); response->set_status(kMasterOk); } else { @@ -1892,9 +1940,12 @@ bool MasterImpl::TabletNodeLoadBalance(TabletNodePtr tabletnode, Scheduler* sche split_size = tablet->GetSchema().split_size(); } if (write_workload > FLAGS_tera_master_workload_split_threshold) { - split_size /= 2; - VLOG(6) << tablet->GetPath() << " write_workload too large, split it by size: " - << split_size; + if (split_size > FLAGS_tera_master_min_split_size) { + split_size = std::max(FLAGS_tera_master_min_split_size, + static_cast(split_size * FLAGS_tera_master_min_split_ratio)); + } + VLOG(6) << tablet->GetPath() << ", trigger workload split, write_workload: " << write_workload + << ", split it by size(M): " << split_size; } int64_t merge_size = FLAGS_tera_master_merge_tablet_size; if (tablet->GetSchema().has_merge_size() && tablet->GetSchema().merge_size() > 0) { @@ -1903,12 +1954,14 @@ bool MasterImpl::TabletNodeLoadBalance(TabletNodePtr tabletnode, Scheduler* sche if (tablet->GetDataSize() < 0) { // tablet size is error, skip it continue; - } else if (tablet->GetDataSize() > (split_size << 20)) { + } else if (tablet->GetDataSize() > (split_size << 20) && + tablet->TestAndSetSplitTimeStamp(get_micros())) { TrySplitTablet(tablet); any_tablet_split = true; continue; } else if (tablet->GetDataSize() < (merge_size << 20)) { - if (write_workload < 1) { + if (!tablet->IsBusy() && + write_workload < FLAGS_tera_master_workload_merge_threshold) { TryMergeTablet(tablet); } else { VLOG(6) << "[merge] skip high workload tablet: " @@ -2133,14 +2186,15 @@ void MasterImpl::DeleteTabletNode(const std::string& tabletnode_addr) { std::vector::iterator it; for (it = tablet_list.begin(); it != tablet_list.end(); ++it) { TabletPtr tablet = *it; - tablet_availability_->AddNotReadyTablet(tablet->GetPath()); if (FLAGS_tera_master_tabletnode_timeout > 0 && tablet->GetTableName() != FLAGS_tera_master_meta_table_name) { - tablet->SetStatusIf(kTabletPending, kTableReady); - } else if (tablet->SetStatusIf(kTableOffLine, kTableReady)) { + tablet->SetStatusIf(kTabletPending, kTableReady, tabletnode_addr); + } else if (tablet->SetStatusIf(kTableOffLine, kTableReady, tabletnode_addr)) { ProcessOffLineTablet(tablet); } + tablet_availability_->AddNotReadyTablet(tablet->GetPath(), tablet->GetStatus(), + tablet->GetTable()->GetStatus()); if (tablet->GetStatus() == kTableUnLoadFail && tablet->GetMergeParam() != NULL) { MergeTabletUnloadCallback(tablet); @@ -2238,6 +2292,7 @@ bool MasterImpl::EnterSafeMode(StatusCode* status) { tablet_manager_->Stop(); DisableTabletNodeGcTimer(); DisableLoadBalance(); + DisableGcTrashCleanTimer(); return true; } @@ -2271,6 +2326,7 @@ bool MasterImpl::LeaveSafeMode(StatusCode* status) { EnableQueryTabletNodeTimer(); EnableTabletNodeGcTimer(); EnableLoadBalance(); + EnableGcTrashCleanTimer(); std::vector node_array; tabletnode_manager_->GetAllTabletNodeInfo(&node_array); @@ -2481,6 +2537,14 @@ void MasterImpl::LoadTabletAsync(TabletPtr tablet, LoadClosure done, uint64_t) { request->add_parent_tablets(meta.parent_tablets(i)); } + std::vector ignore_err_lgs; + tablet->GetErrorIgnoredLGs(&ignore_err_lgs); + for (uint32_t i = 0; i < ignore_err_lgs.size(); ++i) { + VLOG(6) << "Add ignore err lg to request :" << ignore_err_lgs[i]; + request->add_ignore_err_lgs(ignore_err_lgs[i]); + } + tablet->SetErrorIgnoredLGs(); // clean error lg, only for this request once + LOG(INFO) << "LoadTabletAsync id: " << request->sequence_id() << ", " << tablet; node_client.LoadTablet(request, response, done); @@ -3694,6 +3758,7 @@ void MasterImpl::SplitTabletAsync(TabletPtr tablet, const std::string& split_key request->add_child_tablets(tablet->GetTable()->GetNextTabletNo()); request->add_child_tablets(tablet->GetTable()->GetNextTabletNo()); request->set_split_key(split_key); + request->set_master_update_meta(FLAGS_tera_master_update_split_meta); tablet->ToMeta(request->mutable_tablet_meta()); std::vector snapshots; @@ -3704,7 +3769,8 @@ void MasterImpl::SplitTabletAsync(TabletPtr tablet, const std::string& split_key LOG(INFO) << "SplitTabletAsync id: " << request->sequence_id() << ", " << tablet; - tablet_availability_->AddNotReadyTablet(tablet->GetPath()); + tablet_availability_->AddNotReadyTablet(tablet->GetPath(), tablet->GetStatus(), + tablet->GetTable()->GetStatus()); node_client.SplitTablet(request, response, done); } @@ -3714,11 +3780,11 @@ void MasterImpl::SplitTabletCallback(TabletPtr tablet, bool failed, int error_code) { CHECK(tablet->GetStatus() == kTableOnSplit); StatusCode status = response->status(); - delete request; - delete response; + std::unique_ptr response_deleter(response); + std::unique_ptr request_deleter(request); const std::string& server_addr = tablet->GetServerAddr(); - // fail + // fail, RPC fail or unexpected return status if (failed || (status != kTabletNodeOk && status != kTableNotSupport && status != kMetaTabletError)) { if (failed) { @@ -3740,11 +3806,12 @@ void MasterImpl::SplitTabletCallback(TabletPtr tablet, if (status == kTabletNodeOk) { // tabletnode unloaded the tablet LOG(INFO) << "RPC SplitTablet success"; - } else if (status == kTableNotSupport) { + } else if (status == kTableNotSupport) { // TODO: use TryLoadAsync will be more safe. // tabletnode refused to split and didn't unload the tablet tablet->SetStatusIf(kTableReady, kTableOnSplit); ProcessReadyTablet(tablet); } else { + // this will not be true once Master is responsible for write child tablets info, will be deleted CHECK(status == kMetaTabletError); // meta table is not ok LOG(ERROR) << "fail to split: " << StatusCodeToString(status) << ", " @@ -3781,14 +3848,153 @@ void MasterImpl::SplitTabletCallback(TabletPtr tablet, tablet_availability_->EraseNotReadyTablet(tablet->GetPath()); return; } - + // old TS write child tablets info to meta table directly without sending back child tablets info + // we need scan MetaTable to get children meta info + if (response->split_keys_size() == 0) { // scan meta tablet - if (tablet->GetStatus() == kTableOnSplit) { - ScanClosure done = - std::bind(&MasterImpl::ScanMetaCallbackForSplit, this, tablet, _1, _2, _3, _4); - ScanMetaTableAsync(tablet->GetTableName(), tablet->GetKeyStart(), - tablet->GetKeyEnd(), done); + if (tablet->GetStatus() == kTableOnSplit) { + ScanClosure done = + std::bind(&MasterImpl::ScanMetaCallbackForSplit, this, tablet, _1, _2, _3, _4); + ScanMetaTableAsync(tablet->GetTableName(), tablet->GetKeyStart(), + tablet->GetKeyEnd(), done); + } + } else { + if (response->split_keys_size() > 1) { + LOG(INFO) << "currently we only support one split key, tablet " + << tablet << " will be split by key: " << response->split_keys(0); + } + SplitTabletWriteMetaAsync(tablet, response->split_keys(0)); + } +} + +void MasterImpl::SplitTabletWriteMetaAsync(TabletPtr tablet, const std::string& split_key) { + const std::string& key_start = tablet->GetKeyStart(); + const std::string& key_end = tablet->GetKeyEnd(); + if (split_key <= key_start || (key_end != "" && split_key >= key_end)) { + LOG(ERROR) << kSms << "two splits are not successive, " + << tablet << ", split_key: " << split_key; + // the tablet has alreay been unloaded, so we just mark it as kTableOffLine and try to reload it + tablet->SetStatus(kTableOffLine); + ProcessOffLineTablet(tablet); + TryLoadTablet(tablet); + return; + } + std::string meta_addr; + if (!tablet_manager_->GetMetaTabletAddr(&meta_addr)) { + LOG(ERROR) << "[split] meta table is not ready, try to load parent tablet"; + tablet->SetStatus(kTableOffLine); + ProcessOffLineTablet(tablet); + TryLoadTablet(tablet); + return; + } + + WriteTabletRequest* meta_request = new WriteTabletRequest; + WriteTabletResponse* meta_response = new WriteTabletResponse; + meta_request->set_sequence_id(this_sequence_id_.Inc()); + meta_request->set_tablet_name(FLAGS_tera_master_meta_table_name); + meta_request->set_is_sync(true); + meta_request->set_is_instant(true); + + const std::string& parent_path = tablet->GetPath(); + int64_t parent_size = tablet->GetDataSize(); + TablePtr table = tablet->GetTable(); + + std::string child_start_key = key_start; + std::string child_end_key = split_key; + std::vector child_tablets; + for (int i = 0; i < 2; ++i) { + TabletMeta child_meta; + tablet->ToMeta(&child_meta); + child_meta.clear_parent_tablets(); + child_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(parent_path)); + child_meta.set_path(leveldb::GetChildTabletPath(parent_path, table->GetNextTabletNo())); + child_meta.mutable_key_range()->set_key_start(child_start_key); + child_meta.mutable_key_range()->set_key_end(child_end_key); + child_meta.set_size(parent_size / 2); + std::string meta_key, meta_value; + MakeMetaTableKeyValue(child_meta, &meta_key, &meta_value); + RowMutationSequence* mu_seq = meta_request->add_row_list(); + mu_seq->set_row_key(meta_key); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(kPut); + mutation->set_value(meta_value); + child_tablets.emplace_back(new Tablet(child_meta, table)); + child_start_key = child_end_key; + child_end_key = key_end; } + + WriteClosure done = std::bind(&MasterImpl::SplitTabletWriteMetaCallback, this, tablet, + child_tablets, FLAGS_tera_master_meta_retry_times, _1, _2, _3, _4); + + tabletnode::TabletNodeClient meta_node_client(meta_addr); + meta_node_client.WriteTablet(meta_request, meta_response, done); + return; +} + +void MasterImpl::SplitTabletWriteMetaCallback(TabletPtr parent_tablet, + std::vector child_tablets, + int32_t retry_times, + WriteTabletRequest* request, + WriteTabletResponse* response, + bool failed, int error_code) { + StatusCode status = response->status(); + if (!failed && status == kTabletNodeOk) { + CHECK_EQ(response->row_status_list_size(), 2); + CHECK_EQ(child_tablets.size(), 2); + status = response->row_status_list(0); + } + delete request; + delete response; + if (failed || status != kTabletNodeOk) { + if (failed) { + LOG(ERROR) << "[split] fail to add to meta tablet " + << sofa::pbrpc::RpcErrorCodeToString(error_code) << "," + << parent_tablet; + } else { + LOG(ERROR) << "[split] fail to add to meta tablet" + << StatusCodeToString(status) << "," << parent_tablet; + } + if (retry_times <= 0) { + LOG(ERROR) << kSms << "[split] fail to update meta tablet in max retry" + <<" times, parent_tablet: " << parent_tablet; + parent_tablet->SetStatus(kTableOffLine); + ProcessOffLineTablet(parent_tablet); + TryLoadTablet(parent_tablet); + } else { + std::vector meta_entries; + for (std::size_t idx = 0; idx < child_tablets.size(); ++idx) { + meta_entries.push_back(std::bind( + &Tablet::ToMetaTableKeyValue, child_tablets[idx], _1, _2)); + } + WriteClosure done = std::bind(&MasterImpl::SplitTabletWriteMetaCallback, this, + parent_tablet, child_tablets, retry_times - 1, _1, _2, _3, _4); + SuspendMetaOperation(meta_entries, false, done); + } + return; + } + + TabletMeta first_meta, second_meta; + child_tablets[0]->ToMeta(&first_meta); + first_meta.set_status(kTableOffLine); + child_tablets[1]->ToMeta(&second_meta); + second_meta.set_status(kTableOffLine); + TablePtr table = parent_tablet->GetTable(); + table->SplitTablet(parent_tablet, first_meta, second_meta, &child_tablets[0], &child_tablets[1]); + + tablet_availability_->EraseNotReadyTablet(parent_tablet->GetPath()); + tablet_availability_->AddNotReadyTablet(child_tablets[0]->GetPath(), child_tablets[0]->GetStatus(), + table->GetStatus()); + tablet_availability_->AddNotReadyTablet(child_tablets[1]->GetPath(), child_tablets[1]->GetStatus(), + table->GetStatus()); + LOG(INFO) << "split finish," << parent_tablet << ", try load child tablets," + << "\nfirst: " << first_meta.ShortDebugString() + << "\nsecond: " << second_meta.ShortDebugString(); + + ProcessOffLineTablet(child_tablets[0]); + TryLoadTablet(child_tablets[0]); + ProcessOffLineTablet(child_tablets[1]); + TryLoadTablet(child_tablets[1]); + return; } void MasterImpl::TryLoadTablet(TabletPtr tablet, std::string server_addr) { @@ -3971,6 +4177,18 @@ bool MasterImpl::TrySplitTablet(TabletPtr tablet, const std::string& split_key) // abort if status switch to offline (server down / disable) if (!tablet->SetStatusIf(kTableOnSplit, kTableReady)) { LOG(ERROR) << "error state, abort split table " << tablet->GetPath(); + node->FinishSplit(); + + TabletPtr next_tablet; + std::string split_key; + while (node->SplitNextWaitTablet(&next_tablet, &split_key)) { + if (next_tablet->SetStatusIf(kTableOnSplit, kTableReady)) { + next_tablet->SetServerId(node->uuid_); + SplitTabletAsync(next_tablet, split_key); + break; + } + node->FinishSplit(); + } return false; } @@ -4005,7 +4223,7 @@ bool MasterImpl::TryMergeTablet(TabletPtr tablet) { if (tablet2->GetStatus() != kTableReady || tablet2->IsBusy() || - tablet2->GetCounter().write_workload() >= 1) { + tablet2->GetCounter().write_workload() >= FLAGS_tera_master_workload_merge_threshold) { VLOG(20) << "[merge] merge failed, none proper tablet." << " status:" << tablet2->GetStatus() << " isbusy:" << tablet2->IsBusy() @@ -4051,8 +4269,10 @@ void MasterImpl::MergeTabletAsync(TabletPtr tablet_p1, TabletPtr tablet_p2) { std::bind(&MasterImpl::UnloadTabletCallback, this, tablet_p2, FLAGS_tera_master_impl_retry_times, _1, _2, _3, _4); - tablet_availability_->AddNotReadyTablet(tablet_p1->GetPath()); - tablet_availability_->AddNotReadyTablet(tablet_p2->GetPath()); + tablet_availability_->AddNotReadyTablet(tablet_p1->GetPath(), tablet_p1->GetStatus(), + tablet_p1->GetTable()->GetStatus()); + tablet_availability_->AddNotReadyTablet(tablet_p2->GetPath(), tablet_p2->GetStatus(), + tablet_p2->GetTable()->GetStatus()); UnloadTabletAsync(tablet_p1, done1); UnloadTabletAsync(tablet_p2, done2); } @@ -4230,8 +4450,9 @@ void MasterImpl::MergeTabletWriteMetaCallback(TabletPtr tablet_c, tablet_availability_->EraseNotReadyTablet(tablet_p1->GetPath()); tablet_availability_->EraseNotReadyTablet(tablet_p2->GetPath()); - tablet_availability_->AddNotReadyTablet(tablet_c->GetPath()); ProcessOffLineTablet(tablet_c); + tablet_availability_->AddNotReadyTablet(tablet_c->GetPath(), tablet_c->GetStatus(), + tablet_c->GetTable()->GetStatus()); TryLoadTablet(tablet_c); delete request; delete response; @@ -4475,6 +4696,8 @@ void MasterImpl::UpdateTableRecordForEnableCallback(TablePtr table, int32_t retr LOG(ERROR) << "fail to load tablet: " << tablet->GetPath() << ", tablet status: " << StatusCodeToString(tablet->GetStatus()); } + tablet_availability_->AddNotReadyTablet(tablet->GetPath(), tablet->GetStatus(), + tablet->GetTable()->GetStatus()); } } @@ -4871,8 +5094,10 @@ void MasterImpl::ScanMetaCallbackForSplit(TabletPtr tablet, table->SplitTablet(tablet, first_meta, second_meta, &first_tablet, &second_tablet); tablet_availability_->EraseNotReadyTablet(tablet->GetPath()); - tablet_availability_->AddNotReadyTablet(first_tablet->GetPath()); - tablet_availability_->AddNotReadyTablet(second_tablet->GetPath()); + tablet_availability_->AddNotReadyTablet(first_tablet->GetPath(), first_tablet->GetStatus(), + first_tablet->GetTable()->GetStatus()); + tablet_availability_->AddNotReadyTablet(second_tablet->GetPath(), second_tablet->GetStatus(), + second_tablet->GetTable()->GetStatus()); LOG(INFO) << "split finish, " << tablet << ", try load child tablets, " << "\nfirst: " << first_meta.ShortDebugString() << "\nsecond: " << second_meta.ShortDebugString(); @@ -5074,12 +5299,14 @@ void MasterImpl::TryMoveTablet(TabletPtr tablet, const std::string& server_addr, << " to " << server_addr; if (tablet->SetStatusIf(kTableUnLoading, kTableReady)) { tablet->SetExpectServerAddr(server_addr); + tablet->SetLastMoveTime(get_micros()); TabletNodePtr node; if (!server_addr.empty() && tabletnode_manager_->FindTabletNode(server_addr, &node)) { node->PlanToMoveIn(); } - tablet_availability_->AddNotReadyTablet(tablet->GetPath()); + tablet_availability_->AddNotReadyTablet(tablet->GetPath(), tablet->GetStatus(), + tablet->GetTable()->GetStatus()); UnloadClosure done = std::bind(&MasterImpl::UnloadTabletCallback, this, tablet, FLAGS_tera_master_impl_retry_times, _1, _2, _3, _4); @@ -5209,6 +5436,60 @@ void MasterImpl::EnableTabletNodeGcTimer() { gc_enabled_ = true; } +void MasterImpl::DoGcTrashClean() { + { + MutexLock lock(&mutex_); + if (!gc_trash_clean_enabled_) { + gc_trash_clean_timer_id_ = kInvalidTimerId; + return; + } + } + + int64_t start_ts = get_micros(); + io::CleanTrackableGcTrash(); + LOG(INFO) << "[gc] clean trackable gc trash, cost: " + << (get_micros() - start_ts) / 1000 << " ms"; + + MutexLock lock(&mutex_); + ScheduleGcTrashClean(); +} + +void MasterImpl::ScheduleGcTrashClean() { + mutex_.AssertHeld(); + VLOG(10) << "[gc] ScheduleGcTrashClean"; + ThreadPool::Task task = + std::bind(&MasterImpl::DoGcTrashClean, this); + gc_timer_id_ = thread_pool_->DelayTask( + FLAGS_tera_master_gc_trash_clean_period_s * 1000, task); +} + +void MasterImpl::EnableGcTrashCleanTimer() { + if (!FLAGS_tera_master_gc_trash_enabled) { + return; + } + + MutexLock lock(&mutex_); + if (gc_trash_clean_timer_id_ == kInvalidTimerId) { + ScheduleGcTrashClean(); + } + gc_trash_clean_enabled_ = true; +} + +void MasterImpl::DisableGcTrashCleanTimer() { + if (!FLAGS_tera_master_gc_trash_enabled) { + return; + } + + MutexLock lock(&mutex_); + if (gc_trash_clean_timer_id_ != kInvalidTimerId) { + bool non_block = true; + if (thread_pool_->CancelTask(gc_timer_id_, non_block)) { + gc_trash_clean_timer_id_ = kInvalidTimerId; + } + } + gc_trash_clean_enabled_ = false; +} + void MasterImpl::DoAvailableCheck() { MutexLock lock(&mutex_); if (FLAGS_tera_master_availability_check_enabled) { @@ -5285,9 +5566,9 @@ void MasterImpl::DoTabletNodeGcPhase2() { } LOG(INFO) << "[gc] try clean trash dir."; - int64_t start = common::timer::get_micros(); + int64_t start = get_micros(); io::CleanTrashDir(); - int64_t cost = (common::timer::get_micros() - start) / 1000; + int64_t cost = (get_micros() - start) / 1000; LOG(INFO) << "[gc] clean trash dir done, cost: " << cost << "ms."; MutexLock lock(&mutex_); diff --git a/src/master/master_impl.h b/src/master/master_impl.h index a8959c703..3a7a17b7e 100644 --- a/src/master/master_impl.h +++ b/src/master/master_impl.h @@ -52,7 +52,6 @@ class MetaTable; class Scheduler; class TabletManager; class TabletNodeManager; -class MasterImplTest; class MasterImpl { public: @@ -233,6 +232,8 @@ class MasterImpl { CmdCtrlResponse* response); void TabletCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); + void TableCmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response); void MetaCmdCtrl(const CmdCtrlRequest* request, CmdCtrlResponse* response); @@ -363,6 +364,15 @@ class MasterImpl { SplitTabletResponse* response, bool failed, int error_code); + virtual void SplitTabletWriteMetaAsync(TabletPtr tablet, const std::string& split_key); + + void SplitTabletWriteMetaCallback(TabletPtr parent_tablet, + std::vector child_tablets, + int32_t retry_times, + WriteTabletRequest* request, + WriteTabletResponse* response, + bool failed, int err_code); + void MergeTabletAsync(TabletPtr tablet_p1, TabletPtr tablet_p2); virtual void MergeTabletAsyncPhase2(TabletPtr tablet_p1, TabletPtr tablet_p2); void MergeTabletUnloadCallback(TabletPtr tablet); @@ -440,7 +450,7 @@ class MasterImpl { WriteTabletResponse* response, bool failed, int error_code); - void ScanMetaTableAsync(const std::string& table_name, + virtual void ScanMetaTableAsync(const std::string& table_name, const std::string& tablet_key_start, const std::string& tablet_key_end, ScanClosure done); @@ -535,6 +545,10 @@ class MasterImpl { void DumpStatToTable(const TabletNode& stat); // garbage clean + void EnableGcTrashCleanTimer(); + void DisableGcTrashCleanTimer(); + void ScheduleGcTrashClean(); + void DoGcTrashClean(); void EnableTabletNodeGcTimer(); void DisableTabletNodeGcTimer(); void ScheduleTabletNodeGc(); @@ -609,6 +623,8 @@ class MasterImpl { TableImpl* stat_table_; // tabletnode garbage clean + bool gc_trash_clean_enabled_; + int64_t gc_trash_clean_timer_id_; bool gc_enabled_; int64_t gc_timer_id_; bool gc_query_enable_; diff --git a/src/master/master_zk_adapter.cc b/src/master/master_zk_adapter.cc index 6f481f225..7227a43ae 100644 --- a/src/master/master_zk_adapter.cc +++ b/src/master/master_zk_adapter.cc @@ -387,14 +387,6 @@ void MasterZkAdapter::OnSafeModeMarkDeleted() { LOG(ERROR) << "safemode mark node is deleted"; } -void MasterZkAdapter::OnMasterLockLost() { - LOG(ERROR) << "master lock lost"; - master_impl_->SetMasterStatus(MasterImpl::kIsSecondary); - master_impl_->DisableQueryTabletNodeTimer(); - DeleteMasterNode(); - Reset(); -} - void MasterZkAdapter::OnTabletNodeListDeleted() { LOG(ERROR) << "ts dir node is deleted"; if (!MarkSafeMode()) { diff --git a/src/master/master_zk_adapter.h b/src/master/master_zk_adapter.h index 618dbc984..7419a1246 100644 --- a/src/master/master_zk_adapter.h +++ b/src/master/master_zk_adapter.h @@ -66,7 +66,6 @@ class MasterZkAdapter : public MasterZkAdapterBase { virtual void OnSafeModeMarkCreated(); virtual void OnSafeModeMarkDeleted(); - virtual void OnMasterLockLost(); virtual void OnTabletNodeListDeleted(); virtual void OnRootTabletNodeDeleted(); virtual void OnMasterNodeDeleted(); diff --git a/src/master/tablet_manager.cc b/src/master/tablet_manager.cc index d8049e26c..e45f99bd8 100644 --- a/src/master/tablet_manager.cc +++ b/src/master/tablet_manager.cc @@ -31,15 +31,17 @@ DECLARE_string(tera_working_dir); DECLARE_string(tera_master_meta_table_path); DECLARE_string(tera_master_meta_table_name); -DECLARE_bool(tera_zk_enabled); DECLARE_string(tera_master_gc_strategy); +DECLARE_bool(tera_master_gc_trash_enabled); DECLARE_int32(tera_master_impl_retry_times); DECLARE_int32(tera_tabletnode_connect_retry_period); DECLARE_bool(tera_delete_obsolete_tabledir_enabled); DECLARE_string(tera_tabletnode_path_prefix); +DECLARE_int64(tera_master_split_history_time_interval); +DECLARE_string(tera_leveldb_env_type); namespace tera { namespace master { @@ -63,20 +65,22 @@ std::ostream& operator << (std::ostream& o, const TabletPtr& tablet) { return o; } -Tablet::Tablet(const TabletMeta& meta) - : meta_(meta), - update_time_(common::timer::get_micros()), - ready_time_(std::numeric_limits::max()), - merge_param_(NULL), - gc_reported_(false) {} +Tablet::Tablet(const TabletMeta& meta): + meta_(meta), + update_time_(get_micros()), + ready_time_(std::numeric_limits::max()), + last_move_time_us_(0), + merge_param_(NULL), + gc_reported_(false) { } -Tablet::Tablet(const TabletMeta& meta, TablePtr table) - : meta_(meta), - table_(table), - update_time_(common::timer::get_micros()), - ready_time_(std::numeric_limits::max()), - merge_param_(NULL), - gc_reported_(false) {} +Tablet::Tablet(const TabletMeta& meta, TablePtr table): + meta_(meta), + table_(table), + update_time_(get_micros()), + ready_time_(std::numeric_limits::max()), + last_move_time_us_(0), + merge_param_(NULL), + gc_reported_(false) { } Tablet::~Tablet() { table_.reset(); @@ -131,6 +135,21 @@ int64_t Tablet::GetQps() { + average_counter_.scan_rows(); } +int64_t Tablet::GetReadQps() { + MutexLock lock(&mutex_); + return average_counter_.read_rows(); +} + +int64_t Tablet::GetWriteQps() { + MutexLock lock(&mutex_); + return average_counter_.write_rows(); +} + +int64_t Tablet::GetScanQps() { + MutexLock lock(&mutex_); + return average_counter_.scan_rows(); +} + const std::string& Tablet::GetKeyStart() { MutexLock lock(&mutex_); return meta_.key_range().key_start(); @@ -188,8 +207,47 @@ bool Tablet::IsBusy() { if (counter_list_.size() > 0) { return counter_list_.back().is_on_busy(); } else { - return false; + return average_counter_.is_on_busy(); + } +} + +bool Tablet::TestAndSetSplitTimeStamp(int64_t ts) { // timestamp in us + ts /= 1000; // transalte into ms + //MutexLock lock(&mutex_); + if (split_history_.last_split_ts < (ts - FLAGS_tera_master_split_history_time_interval)) { + split_history_.last_split_ts = ts; + return true; + } + return false; +} + +void Tablet::GetErrorIgnoredLGs(std::vector* lgs) { + MutexLock lock(&mutex_); + *lgs = ignore_err_lgs_; +} + +bool Tablet::SetErrorIgnoredLGs(const std::string& lg_list_str) { + if (lg_list_str.empty()) { + MutexLock lock(&mutex_); + ignore_err_lgs_.clear(); + return true; } + std::vector lgs; + SplitString(lg_list_str, ":", &lgs); + const TableSchema& schema = GetSchema(); + std::set lg_schema_set; + for (int i = 0; i < schema.locality_groups_size(); ++i) { + lg_schema_set.insert(schema.locality_groups(i).name()); + } + for (const auto& lg : lgs) { + if (lg_schema_set.find(lg) == lg_schema_set.end()) { + LOG(WARNING) << "set error ignored locality group ["<< lg << "] failed."; + return false; + } + } + MutexLock lock(&mutex_); + ignore_err_lgs_ = lgs; + return true; } std::string Tablet::DebugString() { @@ -220,8 +278,8 @@ void Tablet::SetCounter(const TabletCounter& counter) { average_counter_.set_write_size( CounterWeightedSum(counter.write_size(), average_counter_.write_size())); average_counter_.set_write_workload(counter.write_workload()); - average_counter_.set_is_on_busy( - CounterWeightedSum(counter.is_on_busy(), average_counter_.is_on_busy())); + average_counter_.set_is_on_busy(counter.is_on_busy()); + average_counter_.set_db_status(counter.db_status()); } void Tablet::UpdateSize(const TabletMeta& meta) { @@ -282,6 +340,22 @@ bool Tablet::SetStatusIf(TabletStatus new_status, TabletStatus if_status, return false; } +bool Tablet::SetStatusIf(TabletStatus new_status, + TabletStatus if_status, + const std::string& if_addr) { + MutexLock lock(&mutex_); + if (meta_.status() == if_status && + meta_.server_addr() == if_addr && + CheckStatusSwitch(meta_.status(), new_status)) { + meta_.set_status(new_status); + if (new_status == kTableReady) { + ready_time_ = get_micros(); + } + return true; + } + return false; +} + bool Tablet::SetStatusIf(TabletStatus new_status, TabletStatus if_status, TableStatus if_table_status, TabletStatus* old_status) { if (!IsBound()) { @@ -368,12 +442,22 @@ int64_t Tablet::SetUpdateTime(int64_t timestamp) { int64_t Tablet::ReadyTime() { MutexLock lock(&mutex_); if (meta_.status() != kTableReady) { - return std::numeric_limits::max(); + return std::numeric_limits::max(); } else { return ready_time_; } } +int64_t Tablet::LastMoveTime() const { + MutexLock lock(&mutex_); + return last_move_time_us_; +} + +void Tablet::SetLastMoveTime(int64_t time) { + MutexLock lock(&mutex_); + last_move_time_us_ = time; +} + int32_t Tablet::AddSnapshot(uint64_t snapshot) { MutexLock lock(&mutex_); meta_.add_snapshot_list(snapshot); @@ -582,6 +666,7 @@ Table::Table(const std::string& table_name) deleted_tablet_num_(0), max_tablet_no_(0), create_time_((int64_t)time(NULL)), + metric_(table_name), schema_is_syncing_(false), rangefragment_(NULL), update_rpc_response_(NULL), @@ -936,6 +1021,10 @@ void Table::RefreshCounter() { sspeed += counter.scan_size(); } + metric_.SetTableSize(size); + metric_.SetTabletNum(tablet_num); + metric_.SetNotReady(notready); + counter_.set_size(size); counter_.set_tablet_num(tablet_num); counter_.set_notready_num(notready); @@ -1175,9 +1264,14 @@ bool Table::TryCollectInheritedFile() { std::vector tablet_files; CollectInheritedFileFromFilesystem(name_, *it, &tablet_files); - for (uint32_t i = 0; i < tablet_files.size(); i++) { + if (tablet_files.empty()) { MutexLock l(&mutex_); - AddInheritedFile(tablet_files[i], false); + AddEmptyDeadTablet(*it); + } else { + for (uint32_t i = 0; i < tablet_files.size(); i++) { + MutexLock l(&mutex_); + AddInheritedFile(tablet_files[i], false); + } } } return dead_tablets.size() > 0; @@ -1269,6 +1363,10 @@ bool Table::GetTabletsForGc(std::set* live_tablets, VLOG(10) << "[gc] add dead tablet: " << path; dead_tablets->insert(tabletnum); } + + if (0 == tabletnum) { + LOG(WARNING) << "[gc] invalid tablet path found: <" << path << ">"; + } } if (dead_tablets->size() == 0) { VLOG(10) << "[gc] there is none dead tablets: " << name_; @@ -1300,6 +1398,17 @@ void Table::AddInheritedFile(const TabletFile& file, bool need_ref) { VLOG(10) << "[gc] [" << name_ << "] file " << file << " ref increment to " << file_info.ref; } +void Table::AddEmptyDeadTablet(uint64_t tablet_id) { + mutex_.AssertHeld(); + + if (useful_inh_files_.find(tablet_id) == useful_inh_files_.end()) { + LOG(INFO) << "[gc] [" << name_ << "] new empty dead tablet " + << tablet_id << ", gc disabled"; + gc_disabled_dead_tablets_.insert(tablet_id); + useful_inh_files_[tablet_id]; + } +} + uint64_t Table::CleanObsoleteFile() { leveldb::Env* env = io::LeveldbBaseEnv(); std::string table_path = FLAGS_tera_tabletnode_path_prefix + name_; @@ -1314,13 +1423,38 @@ uint64_t Table::CleanObsoleteFile() { leveldb::Status s; if (file.lg_id == 0 && file.file_id == 0) { std::string path = leveldb::BuildTabletPath(table_path, file.tablet_id); + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the parent directory + s = env->LockFile(path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << path << ", status: " << s.ToString(); + } + delete file_lock; + LOG(INFO) << "[gc] [" << name_ << "] delete dir " << path; s = io::DeleteEnvDir(path); //safely delete dir and all file in it } else { + std::string lg_path = leveldb::BuildTabletLgPath(table_path, file.tablet_id, file.lg_id); + leveldb::FileLock* file_lock = nullptr; + // NEVER remove the trailing character '/', otherwise you will lock the parent directory + s = env->LockFile(lg_path + "/", &file_lock); + if (!s.ok()) { + LOG(WARNING) << "lock path failed, path: " << lg_path << ", status: " << s.ToString(); + } + + delete file_lock; + std::string path = leveldb::BuildTableFilePath(table_path, file.tablet_id, file.lg_id, file.file_id); - LOG(INFO) << "[gc] [" << name_ << "] delete file " << file << " path " << path; - s = env->DeleteFile(path); + if (FLAGS_tera_master_gc_trash_enabled) { + LOG(INFO) << "[gc] [" << name_ << "] move file to trash, file: " + << file << ", path: " << path; + // move sst to trackable gc trash instead of deleting it directly + s = io::MoveSstToTrackableGcTrash(name_, file.tablet_id, file.lg_id, file.file_id); + } else { + LOG(INFO) << "[gc] [" << name_ << "] delete file " << file << " path " << path; + s = env->DeleteFile(path); + } } mutex_.Lock(); if (!s.ok()) { @@ -1554,6 +1688,40 @@ bool TabletManager::FindOverlappedTablets(const std::string& table_name, return true; } +bool TabletManager::SearchTablet(const std::string& table_name, + const std::string& key, + TabletPtr* tablet, + StatusCode* ret_status) { + // lock table list + mutex_.Lock(); + + // search table + TableList::iterator it = all_tables_.find(table_name); + if (it == all_tables_.end()) { + mutex_.Unlock(); + VLOG(5) << "table: " << table_name << " not exist"; + SetStatusCode(kTableNotFound, ret_status); + return false; + } + Table& table = *it->second; + + // lock table + table.mutex_.Lock(); + mutex_.Unlock(); + + // search tablet + Table::TabletList::reverse_iterator rit2 = table.tablets_list_.rbegin(); + for (; rit2 != table.tablets_list_.rend(); ++rit2) { + if (rit2->first <= key) { + *tablet = rit2->second; + break; + } + } + + table.mutex_.Unlock(); + return true; +} + bool TabletManager::FindTable(const std::string& table_name, std::vector* tablet_meta_list, StatusCode* ret_status) { diff --git a/src/master/tablet_manager.h b/src/master/tablet_manager.h index 1e58d62cf..07e942ecb 100644 --- a/src/master/tablet_manager.h +++ b/src/master/tablet_manager.h @@ -16,11 +16,12 @@ #include "common/mutex.h" #include "common/thread_pool.h" +#include "common/metric/metric_counter.h" #include "proto/master_rpc.pb.h" #include "proto/table_meta.pb.h" #include "proto/tabletnode_rpc.pb.h" -#include "utils/counter.h" +#include "common/counter.h" #include "utils/fragment.h" using namespace std::placeholders; @@ -83,7 +84,9 @@ class Tablet { friend std::ostream& operator << (std::ostream& o, const Tablet& tablet); public: - Tablet(); + Tablet() = delete; + Tablet(const Tablet&) = delete; + Tablet& operator=(const Tablet&) = delete; explicit Tablet(const TabletMeta& meta); Tablet(const TabletMeta& meta, TablePtr table); ~Tablet(); @@ -95,6 +98,9 @@ class Tablet { int64_t GetDataSize(); void GetDataSize(int64_t* size, std::vector* lg_size); int64_t GetQps(); + int64_t GetReadQps(); + int64_t GetWriteQps(); + int64_t GetScanQps(); const std::string& GetKeyStart(); const std::string& GetKeyEnd(); @@ -117,6 +123,9 @@ class Tablet { bool SetStatus(TabletStatus new_status, TabletStatus* old_status = NULL); bool SetStatusIf(TabletStatus new_status, TabletStatus if_status, TabletStatus* old_status = NULL); + bool SetStatusIf(TabletStatus new_status, + TabletStatus if_status, + const std::string& if_addr); bool SetStatusIf(TabletStatus new_status, TabletStatus if_status, TableStatus if_table_status, TabletStatus* old_status = NULL); bool SetAddrIf(const std::string& server_addr, TabletStatus if_status, @@ -151,13 +160,20 @@ class Tablet { int64_t UpdateTime(); int64_t SetUpdateTime(int64_t timestamp); int64_t ReadyTime(); + int64_t LastMoveTime() const; + void SetLastMoveTime(int64_t time); void* GetMergeParam(); void SetMergeParam(void* merge_param); + bool TestAndSetSplitTimeStamp(int64_t ts); + + // Will set a flag to ignore lost file error when tabletserver load tablet. + // We should set specific locality_groups that avoid missing some of the + // exceptions in others locality_groups. + void GetErrorIgnoredLGs(std::vector* lgs); + bool SetErrorIgnoredLGs(const std::string& lg_list_str = ""); private: - Tablet(const Tablet&) {} - Tablet& operator=(const Tablet&) {return *this;} static bool CheckStatusSwitch(TabletStatus old_status, TabletStatus new_status); @@ -167,8 +183,10 @@ class Tablet { TablePtr table_; int64_t update_time_; int64_t ready_time_; + int64_t last_move_time_us_; std::string server_id_; std::string expect_server_addr_; + std::vector ignore_err_lgs_; // lg array for ignore_err_ std::list counter_list_; TabletCounter average_counter_; struct TabletAccumulateCounter { @@ -189,6 +207,14 @@ class Tablet { } accumu_counter_; void* merge_param_; + // Tablet Split History Tracing + struct TabletSplitHistory { + int64_t last_split_ts; + + TabletSplitHistory() + : last_split_ts(0) {} + } split_history_; + // protected by Table::mutex_ bool gc_reported_; std::multiset inh_files_; @@ -199,6 +225,42 @@ std::ostream& operator << (std::ostream& o, const TabletPtr& tablet); std::ostream& operator << (std::ostream& o, const TablePtr& table); class Table { + + class TableMetric { + public: + TableMetric(const std::string& name): + table_name_(name), + tablet_num_("tera_master_tablet_num", GetTableNameLabel(), + {SubscriberType::LATEST}, false), + not_ready_("tera_master_tablet_not_ready_num", GetTableNameLabel(), + {SubscriberType::LATEST}, false), + table_size_("tera_master_table_size", GetTableNameLabel(), + {SubscriberType::LATEST}, false) + {} + + void SetTabletNum(int64_t tablet_num) { + tablet_num_.Set(tablet_num); + } + + void SetNotReady(int64_t not_ready) { + not_ready_.Set(not_ready); + } + + void SetTableSize(int64_t table_size) { + table_size_.Set(table_size); + } + + private: + std::string GetTableNameLabel() { + return "table:" + table_name_; + } + + const std::string table_name_; + tera::MetricCounter tablet_num_; + tera::MetricCounter not_ready_; + tera::MetricCounter table_size_; + }; + friend class Tablet; friend class TabletManager; friend std::ostream& operator << (std::ostream& o, const Table& tablet); @@ -262,11 +324,12 @@ class Table { void EnableDeadTabletGarbageCollect(uint64_t tablet_id); void ReleaseInheritedFile(const TabletFile& file); void AddInheritedFile(const TabletFile& file, bool need_ref); + void AddEmptyDeadTablet(uint64_t tablet_id); uint64_t CleanObsoleteFile(); private: - Table(const Table&) {} - Table& operator=(const Table&) {return *this;} + Table(const Table&) = delete; + Table& operator=(const Table&) = delete; typedef std::map TabletList; TabletList tablets_list_; mutable Mutex mutex_; @@ -279,6 +342,7 @@ class Table { uint64_t max_tablet_no_; int64_t create_time_; TableCounter counter_; + TableMetric metric_; bool schema_is_syncing_; // is schema syncing to all ts(all tablets) RangeFragment* rangefragment_; UpdateTableResponse* update_rpc_response_; @@ -348,6 +412,11 @@ class TabletManager { std::vector* tablet_meta_list, StatusCode* ret_status = NULL); + bool SearchTablet(const std::string& table_name, + const std::string& key, + TabletPtr* tablet, + StatusCode* ret_status); + bool FindTable(const std::string& table_name, TablePtr* tablet); int64_t SearchTable(std::vector* tablet_meta_list, diff --git a/src/master/tabletnode_manager.cc b/src/master/tabletnode_manager.cc index 383526a02..ff1767ee4 100644 --- a/src/master/tabletnode_manager.cc +++ b/src/master/tabletnode_manager.cc @@ -6,7 +6,7 @@ #include "master/master_impl.h" #include "master/workload_scheduler.h" -#include "utils/timer.h" +#include "common/timer.h" DECLARE_string(tera_master_meta_table_name); DECLARE_int32(tera_master_max_load_concurrency); diff --git a/src/master/test/master_impl_test.cc b/src/master/test/master_impl_test.cc index e9e130e33..9e0573f04 100644 --- a/src/master/test/master_impl_test.cc +++ b/src/master/test/master_impl_test.cc @@ -14,36 +14,19 @@ #include "utils/utils_cmd.h" #include "version.h" +DECLARE_string(tera_master_port); +DECLARE_string(log_dir); +DECLARE_string(tera_coord_type); +DECLARE_string(tera_leveldb_env_type); + namespace tera { namespace master { class MasterImplTest : public ::testing::Test, public MasterImpl { public: MasterImplTest() : merge_enter_phase2(false) { - } - - void SplitTabletTest() { - SplitTabletRequest* request = NULL; - SplitTabletResponse* response = NULL; - bool failed; - int error_code; - TablePtr table; - TabletPtr tablet; - TabletMeta meta; - - table.reset(new Table("splittest")); - tablet.reset(new Tablet(meta, table)); - request = new SplitTabletRequest; - response = new SplitTabletResponse; - - tablet->SetStatus(kTableReady); - tablet->SetStatus(kTableOnSplit); - response->set_status(kTableNotSupport); - failed = false; - error_code = 0; - - MasterImpl::SplitTabletCallback(tablet, request, response, failed, error_code); - EXPECT_TRUE(tablet->GetStatus() == kTableOffLine); + FLAGS_tera_coord_type = "fake_zk"; + FLAGS_tera_leveldb_env_type = "local"; } bool merge_enter_phase2; @@ -95,6 +78,55 @@ class MasterImplTest : public ::testing::Test, public MasterImpl { return tablet; } + void DeleteTabletNodeTest() { + // add server + std::string addr1 = "127.0.0.1:22000"; + std::string addr2 = "127.0.0.2:22000"; + tabletnode_manager_->AddTabletNode(addr1, addr1); + tabletnode_manager_->AddTabletNode(addr2, addr2); + + // add tabelt + StatusCode s; + TabletMeta meta; + TablePtr table(new Table("table001")); + TabletPtr tablet = MakeTabletPtr("a", "z", table); + tablet->SetStatus(kTableReady); + tablet->SetAddr(addr1); + tablet->ToMeta(&meta); + tablet_manager_->AddTablet(meta, tablet->GetSchema(), &tablet, &s); + tablet->SetServerId(addr1); + + // thread1: get tablet from addr1 + std::vector tablet_list; + std::vector::iterator it; + tablet_manager_->FindTablet(addr1, &tablet_list, true); + EXPECT_TRUE(it != tablet_list.end()); + EXPECT_TRUE(tablet_list.size() == 1); + + // thread2: load tablet into addr2 + LoadTabletRequest* request = new LoadTabletRequest; + LoadTabletResponse* response = new LoadTabletResponse; + tablet->SetAddr(addr2); + tablet->SetServerId(addr2); + + TabletNodePtr node; + tabletnode_manager_->FindTabletNode(addr2, &node); + node->TryLoad(tablet); + tablet->SetStatus(kTableOffLine); + tablet->SetStatus(kTableOnLoad); + response->set_status(kTabletNodeOk); + LoadTabletCallback(tablet, 10, request, response, 0, 0); + EXPECT_TRUE(tablet->GetStatus() == kTableReady); + + // thread1: check addr1 and set status + for (it = tablet_list.begin(); it != tablet_list.end(); ++it) { + TabletPtr t = *it; + t->SetStatusIf(kTabletPending, kTableReady, addr1); + } + EXPECT_TRUE(tablet->GetStatus() == kTableReady); + EXPECT_STREQ(tablet->GetServerAddr().c_str(), addr2.c_str()); + } + // This unload function will not send unload request // Tablet will stay in kTableUnLoading status forever // It can be used to simulate a slow unload @@ -119,9 +151,7 @@ class MasterImplTest : public ::testing::Test, public MasterImpl { LOG(ERROR) << t1->GetStatus() << ";" << t2->GetStatus() << ";" << t3->GetStatus(); EXPECT_TRUE((t1->GetStatus() == kTableUnLoading) && (t2->GetStatus() == kTableUnLoading) - && (t3->GetStatus() == kTableReady)); - - // t2 & t3's merge should fail since t1 & t2 is merging + && (t3->GetStatus() == kTableReady)); // t2 & t3's merge should fail since t1 & t2 is merging MergeTabletAsync(t2, t3); LOG(ERROR) << t1->GetStatus() << ";" << t2->GetStatus() << ";" << t3->GetStatus(); EXPECT_TRUE((t1->GetStatus() == kTableUnLoading) @@ -135,10 +165,49 @@ class MasterImplTest : public ::testing::Test, public MasterImpl { && (t2->GetStatus() == kTableUnLoading) && (t3->GetStatus() == kTableReady)); } + + virtual void ScanMetaTableAsync(const std::string& table_name, + const std::string& tablet_key_start, + const std::string& tablet_end_key, + ScanClosure done); + + virtual void SplitTabletWriteMetaAsync(TabletPtr tablet, const std::string& split_key); }; -TEST_F(MasterImplTest, SplitTest) { - SplitTabletTest(); +void MasterImplTest::ScanMetaTableAsync(const std::string& table_name, + const std::string& tablet_key_start, + const std::string& tablet_end_key, + ScanClosure done) { + + const ::testing::TestInfo* test_case = ::testing::UnitTest::GetInstance()->current_test_info(); + std::string case_name(test_case->test_case_name()); + if (case_name == "InteractWithOldTS") { + EXPECT_TRUE(true); + } + if (case_name.find("InteractWithNewTS") != std::string::npos) { + EXPECT_TRUE(false); + } +} + +void MasterImplTest::SplitTabletWriteMetaAsync(TabletPtr tablet, const std::string& split_key) { + const ::testing::TestInfo* test_case = ::testing::UnitTest::GetInstance()->current_test_info(); + std::string case_name(test_case->test_case_name()); + if (case_name.find("InteractWithOldTS") != std::string::npos) { + EXPECT_TRUE(false); + } + if (case_name.find("InteractWithNewTS") != std::string::npos) { + EXPECT_TRUE(true); + } + EXPECT_EQ(tablet->GetStatus(), kTableOnSplit); + EXPECT_FALSE(split_key.empty()); + EXPECT_GT(split_key, tablet->GetKeyStart()); + if (!tablet->GetKeyEnd().empty()) { + EXPECT_GT(tablet->GetKeyEnd(), split_key); + } +} + +TEST_F(MasterImplTest, DeleteTabletNodeTest) { + DeleteTabletNodeTest(); } TEST_F(MasterImplTest, MergeTest) { @@ -149,6 +218,163 @@ TEST_F(MasterImplTest, MergeTabletBrokenTest) { MergeTabletBrokenTest(); } +TEST_F(MasterImplTest, SplitNotSupport) { + SplitTabletRequest* request = NULL; + SplitTabletResponse* response = NULL; + bool failed; + int error_code; + TablePtr table; + TabletPtr tablet; + TabletMeta meta; + + table.reset(new Table("splittest")); + tablet.reset(new Tablet(meta, table)); + request = new SplitTabletRequest; + response = new SplitTabletResponse; + + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + response->set_status(kTableNotSupport); + failed = false; + error_code = 0; + + MasterImpl::SplitTabletCallback(tablet, request, response, failed, error_code); + EXPECT_TRUE(tablet->GetStatus() == kTableOffLine); +} + +TEST_F(MasterImplTest, InteractWithOldTS) { + SplitTabletRequest* request = NULL; + SplitTabletResponse* response = NULL; + TablePtr table; + TabletPtr tablet; + TabletMeta meta; + + table.reset(new Table("splittest")); + tablet.reset(new Tablet(meta, table)); + request = new SplitTabletRequest; + response = new SplitTabletResponse; + + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + response->set_status(kTabletNodeOk); + + bool failed = false; + int error_code = 0; + MasterImpl::SplitTabletCallback(tablet, request, response, failed, error_code); +} + +TEST_F(MasterImplTest, InteractWithNewTSOK){ + TablePtr table; + TabletPtr tablet; + TabletMeta meta; + + table.reset(new Table("splittest")); + tablet.reset(new Tablet(meta, table)); + SplitTabletRequest* request = new SplitTabletRequest; + SplitTabletResponse* response = new SplitTabletResponse; + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + response->set_status(kTabletNodeOk); + response->add_split_keys("abc"); + bool failed = false; + int error_code = 0; + MasterImpl::SplitTabletCallback(tablet, request, response, failed, error_code); + + meta.mutable_key_range()->set_key_start("ab"); + meta.mutable_key_range()->set_key_end("bc"); + tablet.reset(new Tablet(meta, table)); + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + request = new SplitTabletRequest; + response = new SplitTabletResponse; + response->add_split_keys("b"); + MasterImpl::SplitTabletCallback(tablet, request, response, failed, error_code); + EXPECT_EQ(tablet->GetStatus(), kTableOnSplit); +} + +TEST_F(MasterImplTest, NewTSReturnInvalidSplitKey){ + TablePtr table; + TabletPtr tablet; + TabletMeta meta; + + meta.mutable_key_range()->set_key_start("aa"); + meta.mutable_key_range()->set_key_end("cc"); + table.reset(new Table("splittest")); + tablet.reset(new Tablet(meta, table)); + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + MasterImpl::SplitTabletWriteMetaAsync(tablet, ""); + EXPECT_EQ(tablet->GetStatus(), kTableOffLine); + + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + MasterImpl::SplitTabletWriteMetaAsync(tablet, "aa"); + EXPECT_EQ(tablet->GetStatus(), kTableOffLine); + + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + MasterImpl::SplitTabletWriteMetaAsync(tablet, "cc"); + EXPECT_EQ(tablet->GetStatus(), kTableOffLine); + + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + MasterImpl::SplitTabletWriteMetaAsync(tablet, "d"); + EXPECT_EQ(tablet->GetStatus(), kTableOffLine); + + meta.mutable_key_range()->set_key_end(""); + tablet.reset(new Tablet(meta, table)); + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + MasterImpl::SplitTabletWriteMetaAsync(tablet, ""); + EXPECT_EQ(tablet->GetStatus(), kTableOffLine); + + meta.Clear(); + tablet.reset(new Tablet(meta, table)); + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + MasterImpl::SplitTabletWriteMetaAsync(tablet, ""); + EXPECT_EQ(tablet->GetStatus(), kTableOffLine); + +} + +TEST_F(MasterImplTest, SplitTabletWriteMetaCallback) { + TablePtr table; + TabletPtr tablet; + TabletMeta meta; + + meta.mutable_key_range()->set_key_start("a"); + meta.mutable_key_range()->set_key_end("c"); + table.reset(new Table("splittest")); + tablet.reset(new Tablet(meta, table)); + tablet->SetStatus(kTableReady); + tablet->SetStatus(kTableOnSplit); + std::vector child_tablets; + meta.mutable_key_range()->set_key_end("b"); + child_tablets.emplace_back(new Tablet(meta)); + meta.mutable_key_range()->set_key_start("b"); + meta.mutable_key_range()->set_key_end("c"); + child_tablets.emplace_back(new Tablet(meta)); + bool failed = false; + int error_code = 0; + + WriteTabletRequest* request = new WriteTabletRequest; + WriteTabletResponse* response = new WriteTabletResponse; + + response->set_status(kTabletNodeOk); + response->add_row_status_list(kTabletNodeOk); + response->add_row_status_list(kTabletNodeOk); + + MasterImpl::SplitTabletWriteMetaCallback(tablet, + child_tablets, 1, request, response, failed, error_code); + EXPECT_EQ(table->tablets_list_.size(), 2); + TabletPtr t1, t2; + table->FindTablet("a", &t1); + table->FindTablet("b", &t2); + EXPECT_EQ(t1->GetStatus(), kTableOffLine); + EXPECT_EQ(t2->GetStatus(), kTableOffLine); + EXPECT_STREQ(t1->GetKeyEnd().c_str(), t2->GetKeyStart().c_str()); +} + } // master } // tera diff --git a/src/master/test/master_test.cc b/src/master/test/master_test.cc index 89b44c208..d0ecfb87f 100644 --- a/src/master/test/master_test.cc +++ b/src/master/test/master_test.cc @@ -8,20 +8,13 @@ #include "utils/utils_cmd.h" -DECLARE_string(tera_master_port); -DECLARE_string(log_dir); -DECLARE_bool(tera_zk_enabled); DECLARE_string(tera_leveldb_env_type); -DECLARE_string(tera_fake_zk_path_prefix); int main(int argc, char** argv) { ::google::ParseCommandLineFlags(&argc, &argv, true); ::google::InitGoogleLogging(argv[0]); - - FLAGS_tera_zk_enabled = false; - FLAGS_tera_leveldb_env_type = "local"; - tera::utils::SetupLog("master_test"); + FLAGS_tera_leveldb_env_type = "local"; ::testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); diff --git a/src/master/test/trackable_gc_test.cc b/src/master/test/trackable_gc_test.cc index 7d6c78dd6..09cee5dda 100644 --- a/src/master/test/trackable_gc_test.cc +++ b/src/master/test/trackable_gc_test.cc @@ -11,7 +11,7 @@ #include "master/tablet_manager.h" #include "utils/utils_cmd.h" -DECLARE_bool(tera_zk_enabled); +DECLARE_string(tera_coord_type); DECLARE_string(tera_leveldb_env_type); DECLARE_string(tera_master_gc_strategy); DECLARE_string(tera_tabletnode_path_prefix); @@ -500,7 +500,7 @@ class TrackableGcTest : public ::testing::Test { static void SetUpTestCase() { std::cout << "SetUpTestCase" << std::endl; - FLAGS_tera_zk_enabled = false; + FLAGS_tera_coord_type = "fake_zk"; FLAGS_tera_leveldb_env_type = "local"; FLAGS_tera_master_gc_strategy = "trackable"; FLAGS_tera_tabletnode_path_prefix = "./"; diff --git a/src/master/workload_scheduler.cc b/src/master/workload_scheduler.cc index f0f70540c..5933827cb 100644 --- a/src/master/workload_scheduler.cc +++ b/src/master/workload_scheduler.cc @@ -11,6 +11,7 @@ DECLARE_double(tera_master_load_balance_size_ratio_trigger); DECLARE_int32(tera_master_load_balance_ts_load_threshold); +DECLARE_int64(tera_master_load_balance_ts_size_threshold); DECLARE_int32(tera_master_load_balance_scan_weight); namespace tera { @@ -76,8 +77,8 @@ bool SizeScheduler::MayMoveOut(const TabletNodePtr& node, const std::string& table_name) { VLOG(16) << "[size-sched] MayMoveOut()"; int64_t node_size = node->GetSize(table_name); - if (node_size <= 0) { - VLOG(16) << "[size-sched] node has no data"; + if (node_size <= FLAGS_tera_master_load_balance_ts_size_threshold) { + VLOG(16) << "[size-sched] node do not need loadbalance"; return false; } return true; diff --git a/src/monitor/teramo_main.cc b/src/monitor/teramo_main.cc index d2a5d6417..169437948 100644 --- a/src/monitor/teramo_main.cc +++ b/src/monitor/teramo_main.cc @@ -19,7 +19,7 @@ #include "proto/tabletnode.pb.h" #include "tera.h" #include "utils/utils_cmd.h" -#include "utils/timer.h" +#include "common/timer.h" DEFINE_string(tera_monitor_default_request_filename, "tera_monitor.request", ""); DEFINE_string(tera_monitor_default_response_filename, "tera_monitor.response", ""); @@ -34,6 +34,7 @@ DECLARE_string(tera_ins_addr_list); DECLARE_string(tera_ins_root_path); DECLARE_bool(tera_zk_enabled); DECLARE_bool(tera_ins_enabled); +DECLARE_string(tera_coord_type); DECLARE_int64(tera_master_stat_table_interval); using namespace tera; @@ -296,9 +297,11 @@ void InitFlags(int32_t argc, char** argv, const MonitorRequest& request) { if (request.has_tera_zk_root()) { FLAGS_tera_ins_root_path = request.tera_zk_root(); } + FLAGS_tera_coord_type = "ins"; FLAGS_tera_ins_enabled = true; FLAGS_tera_zk_enabled = false; } else { + FLAGS_tera_coord_type = "zk"; if (request.has_tera_zk_addr()) { FLAGS_tera_zk_addr_list = request.tera_zk_addr(); } diff --git a/src/observer/executor/key_selector.h b/src/observer/executor/key_selector.h new file mode 100644 index 000000000..b6746b612 --- /dev/null +++ b/src/observer/executor/key_selector.h @@ -0,0 +1,29 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_EXECUTOR_KEY_SELECTOR_H_ +#define TERA_OBSERVER_EXECUTOR_KEY_SELECTOR_H_ + +#include +#include + +#include "tera.h" + +namespace tera { +namespace observer { + +class KeySelector { +public: + virtual ~KeySelector() {} + + // output: selected table name, selected start key + virtual bool SelectStart(std::string* table_name, + std::string* start_key) = 0; + virtual ErrorCode Observe(const std::string& table_name) = 0; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_EXECUTOR_KEY_SELECTOR_H_ diff --git a/src/observer/executor/notification.h b/src/observer/executor/notification.h new file mode 100644 index 000000000..a73cbb255 --- /dev/null +++ b/src/observer/executor/notification.h @@ -0,0 +1,38 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_EXECUTOR_NOTIFICATION_H_ +#define TERA_OBSERVER_EXECUTOR_NOTIFICATION_H_ + +#include +#include + +#include "tera.h" + +#pragma GCC visibility push(default) + +namespace tera { +namespace observer { + +class Notification { +public: + virtual ~Notification() {} + + virtual void Ack(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) = 0; + + virtual void Notify(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) = 0; +}; + +} // namespace observer +} // namespace tera + +#pragma GCC visibility pop + +#endif // TERA_OBSERVER_EXECUTOR_NOTIFICATION_H_ diff --git a/src/observer/executor/notification_impl.cc b/src/observer/executor/notification_impl.cc new file mode 100644 index 000000000..125509d79 --- /dev/null +++ b/src/observer/executor/notification_impl.cc @@ -0,0 +1,67 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/executor/notification_impl.h" + +#include + +#include "common/timer.h" +#include "common/base/string_number.h" +#include "sdk/global_txn_internal.h" +#include "types.h" + +namespace tera { +namespace observer { + +Notification* GetNotification(Transaction* transaction) { + return new NotificationImpl(transaction); +} + +NotificationImpl::NotificationImpl(Transaction* transaction) + : transaction_(transaction), + start_timestamp_(get_micros()), + notify_timestamp_(0) {} + + void NotificationImpl::Ack(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) { + if (transaction_ != NULL) { + transaction_->Ack(t, row_key, column_family, qualifier); + return; + } + + // kNoneTransaction + tera::RowMutation* mutation = t->NewRowMutation(row_key); + std::string notify_qulifier = PackNotifyName(column_family, qualifier); + mutation->DeleteColumns(kNotifyColumnFamily, notify_qulifier, start_timestamp_); + t->ApplyMutation(mutation); + delete mutation; + } + +void NotificationImpl::Notify(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) { + if (transaction_ != NULL) { + transaction_->Notify(t, row_key, column_family, qualifier); + return; + } + + // kNoneTransaction + if (notify_timestamp_ == 0) { + notify_timestamp_ = get_micros(); + } + + tera::ErrorCode err; + std::string notify_qulifier = PackNotifyName(column_family, qualifier); + t->Put(row_key, kNotifyColumnFamily, notify_qulifier, NumberToString(notify_timestamp_), notify_timestamp_, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Notify error. table: " << t->GetName() << " row " + << row_key << " pos: " << column_family << ":" << qualifier; + } +} + +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/notification_impl.h b/src/observer/executor/notification_impl.h new file mode 100644 index 000000000..a88399d79 --- /dev/null +++ b/src/observer/executor/notification_impl.h @@ -0,0 +1,42 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_EXECUTOR_NOTIFICATION_IMPL_H_ +#define TERA_OBSERVER_EXECUTOR_NOTIFICATION_IMPL_H_ + +#include +#include + +#include "observer/executor/notification.h" +#include "tera.h" + +namespace tera { +namespace observer { + +Notification* GetNotification(Transaction* transaction); + +class NotificationImpl : public Notification { +public: + explicit NotificationImpl(Transaction* transaction); + virtual ~NotificationImpl() {} + + virtual void Ack(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier); + + virtual void Notify(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier); +private: + Transaction* transaction_; + int64_t start_timestamp_; + int64_t notify_timestamp_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_EXECUTOR_NOTIFICATION_IMPL_H_ diff --git a/src/observer/executor/notify_cell.h b/src/observer/executor/notify_cell.h new file mode 100644 index 000000000..9567c7231 --- /dev/null +++ b/src/observer/executor/notify_cell.h @@ -0,0 +1,110 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_EXECUTOR_NOTIFY_CELL_H_ +#define TERA_OBSERVER_EXECUTOR_TNOTIFY_CELL_H_ + +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "observer/executor/observer.h" +#include "observer/rowlocknode/fake_rowlock_client.h" +#include "sdk/rowlock_client.h" +#include "tera.h" + +DECLARE_string(rowlock_server_port); +DECLARE_string(rowlock_server_ip); +DECLARE_bool(mock_rowlock_enable); + + +namespace tera { +namespace observer { + +struct Column { + std::string table_name; + std::string family; + std::string qualifier; + + bool operator<(const Column& other) const { + int32_t result = 0; + result = table_name.compare(other.table_name); + if (result != 0) { + return result < 0; + } + result = family.compare(other.family); + if (result != 0) { + return result < 0; + } + result = qualifier.compare(other.qualifier); + + return result < 0; + } + + bool operator==(const Column& other) const { + return table_name == other.table_name && family == other.family + && qualifier == other.qualifier; + } +}; + +struct AutoRowUnlocker { + AutoRowUnlocker(const std::string& table, + const std::string& unlock_row) + : table_name(table), + row(unlock_row) {} + AutoRowUnlocker() {} + + ~AutoRowUnlocker() { + // UnLockRow + + if (FLAGS_mock_rowlock_enable == true) { + client.reset(new FakeRowlockClient()); + } else { + client.reset(new RowlockClient()); + } + + RowlockRequest request; + RowlockResponse response; + + request.set_row(row); + request.set_table_name(table_name); + + client->UnLock(&request, &response); + VLOG(12) <<"[time] Transaction finish. [row] " << row; + } + + std::unique_ptr client; + std::string table_name; + std::string row; +}; + +// info inside scanner +struct NotifyCell { + NotifyCell(tera::Transaction* t) : transaction(t), + table(NULL) {} + ~NotifyCell() { + if (transaction) { + delete transaction; + } + } + + std::string row; + std::string value; + int64_t timestamp; + + Column observed_column; + tera::Transaction* transaction; + tera::Table* table; + + std::shared_ptr unlocker; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_EXECUTOR_NOTIFY_CELL_H_ diff --git a/src/observer/executor/observer.h b/src/observer/executor/observer.h new file mode 100644 index 000000000..db1d912ae --- /dev/null +++ b/src/observer/executor/observer.h @@ -0,0 +1,52 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_H_ +#define TERA_OBSERVER_H_ + +#include + +#include "tera/client.h" +#include "tera/error_code.h" +#include "tera/transaction.h" +#include "observer/executor/notification.h" + +#pragma GCC visibility push(default) +namespace tera { +namespace observer { + +enum TransactionType { + kGlobalTransaction = 0, + kSingleRowTransaction = 1, + kNoneTransaction = 2, +}; + +class Observer { +public: + virtual ~Observer() {} + + // if notify and ack are needed during OnNotify, + // call notifiaction->Ack and notification->Notify + // before transaction commit + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) = 0; + // return observer name + virtual std::string GetObserverName() const = 0; + + // return TransactionType + virtual TransactionType GetTransactionType() const = 0; +}; + +} // namespace observer +} +#pragma GCC visibility pop + +#endif // TERA_OBSERVER_H_ diff --git a/src/observer/executor/random_key_selector.cc b/src/observer/executor/random_key_selector.cc new file mode 100644 index 000000000..75b0129ab --- /dev/null +++ b/src/observer/executor/random_key_selector.cc @@ -0,0 +1,134 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/executor/random_key_selector.h" + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "types.h" + +DECLARE_string(flagfile); + +namespace tera { +namespace observer { + +RandomKeySelector::RandomKeySelector() + : tables_(new std::map>), + quit_(false), + cond_(&quit_mutex_) { + tera::ErrorCode err; + client_ = tera::Client::NewClient(FLAGS_flagfile, &err); + update_thread_.Start(std::bind(&RandomKeySelector::Update, this)); +} + +RandomKeySelector::~RandomKeySelector() { + { + MutexLock locker(&quit_mutex_); + quit_ = true; + cond_.Broadcast(); + } + + update_thread_.Join(); + if (client_ != NULL) { + delete client_; + } +} + +bool RandomKeySelector::SelectStart(std::string* table_name, + std::string* start_key) { + srand((unsigned)time(NULL)); + + std::shared_ptr>> table_read_copy; + { + MutexLock locker(&table_mutex_); + // copy for copy-on-write, ref +1 + table_read_copy = tables_; + } + + if (table_read_copy->size() == 0) { + return false; + } + + // random table + uint32_t table_no = rand() % observe_tables_.size(); + *table_name = observe_tables_[table_no]; + + + // random key + size_t tablet_num = (*table_read_copy)[*table_name].size(); + if (0 == tablet_num) { + LOG(ERROR) << "No tablet"; + return false; + } + + uint32_t tablet_no = rand() % tablet_num; + *start_key = (*table_read_copy)[*table_name][tablet_no].start_key; + + VLOG(25) << "Random StartKey=" << *start_key << " TabletNo=" << tablet_no; + return true; +} + +ErrorCode RandomKeySelector::Observe(const std::string& table_name) { + tera::ErrorCode err; + + MutexLock locker(&table_mutex_); + + if (!tables_.unique()) { + // In this case threads may reading this copy. + // Shared_ptr construct a new copy from the original one. + // Later requests will operate on the new copy. + tables_.reset(new std::map>(*tables_)); + } + if (tables_->find(table_name) == tables_->end()) { + + std::vector tablets; + client_->GetTabletLocation(table_name, &tablets, &err); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "Observe table failed, " << err.ToString(); + return err; + } + observe_tables_.push_back(table_name); + (*tables_)[table_name] = tablets; + } + return err; +} + +void RandomKeySelector::Update() { + tera::ErrorCode err; + while (true) { + { + MutexLock locker(&quit_mutex_); + if (quit_) { + return; + } + cond_.TimeWaitInUs(kObserverWaitTime); + } + + // update data first + std::shared_ptr>> table_update_copy( + new std::map>); + + // updated table + for (uint32_t i = 0; i < observe_tables_.size(); ++i) { + std::string table_name = observe_tables_[i]; + + std::vector tablets; + client_->GetTabletLocation(table_name, &tablets, &err); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "Observe table failed, " << err.ToString(); + continue; + } + + table_update_copy->insert(std::pair>(table_name, tablets)); + } + + // update pointer + MutexLock locker(&table_mutex_); + tables_.swap(table_update_copy); + } +} + +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/random_key_selector.h b/src/observer/executor/random_key_selector.h new file mode 100644 index 000000000..5a20fb4f3 --- /dev/null +++ b/src/observer/executor/random_key_selector.h @@ -0,0 +1,47 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_EXECUTOR_RANDOM_KEY_SELECTOR_H_ +#define TERA_OBSERVER_EXECUTOR_RANDOM_KEY_SELECTOR_H_ + +#include +#include +#include +#include + +#include "common/mutex.h" +#include "common/thread.h" +#include "observer/executor/key_selector.h" +#include "tera.h" + +namespace tera { +namespace observer { + +class RandomKeySelector : public KeySelector { +public: + RandomKeySelector(); + virtual ~RandomKeySelector(); + + virtual bool SelectStart(std::string* table_name, + std::string* start_key); + virtual ErrorCode Observe(const std::string& table_name); +private: + void Update(); + +private: + tera::Client* client_; + mutable Mutex table_mutex_; + std::vector observe_tables_; + std::shared_ptr>> tables_; + common::Thread update_thread_; + + mutable Mutex quit_mutex_; + bool quit_; + common::CondVar cond_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_EXECUTOR_RANDOM_KEY_SELECTOR_H_ \ No newline at end of file diff --git a/src/observer/executor/scanner.h b/src/observer/executor/scanner.h new file mode 100644 index 000000000..a11a8646d --- /dev/null +++ b/src/observer/executor/scanner.h @@ -0,0 +1,41 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_SCANNER_H_ +#define TERA_SCANNER_H_ + +#include + +#include "observer/executor/observer.h" +#include "tera/error_code.h" + +#pragma GCC visibility push(default) +namespace tera { +namespace observer { + +class Scanner { +public: + static Scanner* GetScanner(); + + virtual ~Scanner() {} + + // register user define observers + // user should not destruct observers, which will be handled by scanner + virtual ErrorCode Observe(const std::string& table_name, + const std::string& column_family, + const std::string& qualifier, + Observer* observer) = 0; + + virtual bool Init() = 0; + + virtual bool Start() = 0; + + virtual void Exit() = 0; +}; + +} // namespace observer +} // namespace tera +#pragma GCC visibility pop + +#endif // TERA_SCANNER_H_ diff --git a/src/observer/executor/scanner_entry.cc b/src/observer/executor/scanner_entry.cc new file mode 100644 index 000000000..5b012b339 --- /dev/null +++ b/src/observer/executor/scanner_entry.cc @@ -0,0 +1,63 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/executor/scanner_entry.h" + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "observer/executor/scanner_impl.h" + +namespace tera { +namespace observer { + +ScannerEntry::ScannerEntry() {} + +ScannerEntry::~ScannerEntry() {} + +bool ScannerEntry::StartServer() { + scanner_.reset(tera::observer::Scanner::GetScanner()); + + if(!scanner_->Init()) { + LOG(ERROR) << "fail to init scanner_impl"; + return false; + } + + // observe observers to scanner + ErrorCode err = Observe(); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "Observe failed, reason: " << err.ToString(); + return false; + } + + if(!scanner_->Start()) { + LOG(ERROR) << "fail to start scanner_impl"; + return false; + } + return true; +} + +void ScannerEntry::ShutdownServer() { + LOG(INFO) << "shut down scanner"; + scanner_->Exit(); + scanner_.reset(); + LOG(INFO) << "scanner stop done!"; +} + +bool ScannerEntry::Run() { + ThisThread::Sleep(1000); + return true; +} + +ErrorCode ScannerEntry::Observe() { + ErrorCode err; + return err; +} + +Scanner* ScannerEntry::GetScanner() const { + return scanner_.get(); +} + +} // namespace observer +} // namespace tera \ No newline at end of file diff --git a/src/observer/executor/scanner_entry.h b/src/observer/executor/scanner_entry.h new file mode 100644 index 000000000..ed5e5c325 --- /dev/null +++ b/src/observer/executor/scanner_entry.h @@ -0,0 +1,40 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_EXECUTOR_SCANNER_ENTRY_H_ +#define TERA_OBSERVER_EXECUTOR_SCANNER_ENTRY_H_ + +#include +#include + +#include "common/this_thread.h" +#include "observer/executor/observer.h" +#include "tera.h" +#include "tera_entry.h" + +namespace tera { +namespace observer { + +class Scanner; + +class ScannerEntry : public TeraEntry { +public: + ScannerEntry(); + virtual ~ScannerEntry(); + + virtual bool StartServer(); + virtual bool Run(); + virtual void ShutdownServer(); + + virtual ErrorCode Observe(); + Scanner* GetScanner() const; +private: + std::unique_ptr scanner_; +}; + + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_EXECUTOR_SCANNER_ENTRY_H_ \ No newline at end of file diff --git a/src/observer/executor/scanner_impl.cc b/src/observer/executor/scanner_impl.cc new file mode 100644 index 000000000..f42ba6b05 --- /dev/null +++ b/src/observer/executor/scanner_impl.cc @@ -0,0 +1,657 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/executor/scanner_impl.h" + +#include +#include +#include + +#include + +#include "gflags/gflags.h" + +#include "common/base/string_number.h" +#include "observer/executor/random_key_selector.h" +#include "observer/executor/notification.h" +#include "observer/executor/notification_impl.h" +#include "observer/rowlocknode/fake_rowlock_client.h" +#include "sdk/table_impl.h" +#include "sdk/sdk_utils.h" +#include "tera.h" +#include "types.h" + +DECLARE_int32(observer_proc_thread_num); +DECLARE_int32(observer_scanner_thread_num); +DECLARE_int32(observer_ack_conflict_timeout); +DECLARE_int64(observer_max_pending_task); +DECLARE_int64(observer_ack_timeout_time); +DECLARE_string(flagfile); +DECLARE_string(rowlock_server_ip); +DECLARE_string(rowlock_server_port); +DECLARE_int32(observer_rowlock_client_thread_num); +DECLARE_bool(mock_rowlock_enable); + +namespace tera { +namespace observer { + +ScannerImpl* ScannerImpl::scanner_instance_ = new ScannerImpl(); +Scanner* Scanner::GetScanner() { + return ScannerImpl::GetInstance(); +} + +ScannerImpl* ScannerImpl::GetInstance() { + return scanner_instance_; +} + +ScannerImpl::ScannerImpl() + : tera_client_(NULL), + table_observe_info_(new std::map), + scan_table_threads_(new common::ThreadPool(FLAGS_observer_scanner_thread_num)), + transaction_threads_(new common::ThreadPool(FLAGS_observer_proc_thread_num)), + quit_(false), + cond_(&quit_mutex_) { + profiling_thread_.Start(std::bind(&ScannerImpl::Profiling, this)); +} + +ScannerImpl::~ScannerImpl() { + Exit(); + + scan_table_threads_->Stop(true); + transaction_threads_->Stop(true); + profiling_thread_.Join(); + + MutexLock locker(&table_mutex_); + // close table + for (auto it = table_observe_info_->begin(); it != table_observe_info_->end(); ++it) { + if (it->second.table != NULL) { + delete it->second.table; + } + } + + if (tera_client_ != NULL) { + delete tera_client_; + } + + for (auto it = observers_.begin(); it != observers_.end(); ++it) { + delete *it; + } +} + +ErrorCode ScannerImpl::Observe(const std::string& table_name, + const std::string& column_family, + const std::string& qualifier, + Observer* observer) { + // Observe before init + tera::ErrorCode err; + if (NULL == tera_client_) { + LOG(ERROR) << "Init scanner first!"; + err.SetFailed(ErrorCode::kSystem, "observe before scanner init"); + return err; + } + + Column column = {table_name, column_family, qualifier}; + + { + MutexLock locker(&table_mutex_); + if (!table_observe_info_.unique()) { + // Shared_ptr construct a new copy from the original one. + // Former requests still reading the original shared_ptr + // Write operation executed on the new copy, so as the later requests + table_observe_info_.reset(new std::map(*table_observe_info_)); + } + + if ((*table_observe_info_)[table_name].table == NULL) { + // init table + tera::Table* table = tera_client_->OpenTable(table_name, &err); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "open tera table [" << table_name << "] failed, " << err.ToString(); + return err; + } + LOG(INFO) << "open tera table [" << table_name << "] succ"; + + // build map + (*table_observe_info_)[table_name].table = table; + (*table_observe_info_)[table_name].type = GetTableTransactionType(table); + } + + if (!CheckTransactionTypeLegalForTable(observer->GetTransactionType(), + (*table_observe_info_)[table_name].type)) { + LOG(ERROR) << "Transaction type does not match table. table_name: " << table_name + << " type: " << (*table_observe_info_)[table_name].type << " , observer name: " << + observer->GetObserverName() << " type: " << observer->GetTransactionType(); + err.SetFailed(ErrorCode::kSystem, "Transaction type does not match table"); + return err; + } + + auto it = (*table_observe_info_)[table_name].observe_columns[column].insert(observer); + if (!it.second) { + LOG(ERROR) << "Observer " << observer->GetObserverName() << " observe " << table_name + << ":" << column_family << ":" << qualifier << " more than once!"; + err.SetFailed(ErrorCode::kSystem, "the same observer observe the same column more than once"); + return err; + } + observers_.insert(observer); + } + + err = key_selector_->Observe(table_name); + LOG(INFO) << "Observer start. table: " << table_name << " cf:qu " << column_family << ":" << + qualifier << " observer: " << observer->GetObserverName(); + + return err; +} + +bool ScannerImpl::Init() { + tera::ErrorCode err; + if (NULL == tera_client_) { + tera_client_ = tera::Client::NewClient(FLAGS_flagfile, &err); + + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "init tera client [" << FLAGS_flagfile << "] failed, " << err.ToString(); + return false; + } + } + + // init key_selector_ + // different selector started by different flags + key_selector_.reset(new RandomKeySelector()); + + return true; +} + +bool ScannerImpl::Start() { + for (int32_t idx = 0; idx < FLAGS_observer_scanner_thread_num; ++idx) { + scan_table_threads_->AddTask(std::bind(&ScannerImpl::ScanTable, this)); + } + return true; +} + +void ScannerImpl::Exit() { + // the scope of quit_mutex only covers cond_ broadcast + MutexLock locker(&quit_mutex_); + quit_ = true; + cond_.Broadcast(); +} + +tera::Client* ScannerImpl::GetTeraClient() const { + return tera_client_; +} + +void ScannerImpl::ScanTable() { + std::string start_key; + std::string table_name; + std::set columns; + tera::Table* table = NULL; + + // table and start key will be refreshed. + while (true) { + { + MutexLock locker(&quit_mutex_); + if (quit_) { + break; + } + cond_.TimeWaitInUs(kObserverWaitTime); + } + + if (key_selector_->SelectStart(&table_name, &start_key)) { + GetObserveColumns(table_name, &columns); + } else { + continue; + } + + table = GetTable(table_name); + if (DoScanTable(table, columns, start_key, "")) { + DoScanTable(table, columns, "", start_key); + } + } +} + +bool ScannerImpl::DoScanTable(tera::Table* table, + const std::set& columns, + const std::string& start_key, + const std::string& end_key) { + if (table == NULL) { + return false; + } + + LOG(INFO) << "Start scan table. Table name: [" << table->GetName() + << "]. Start key: [" << start_key << "]"; + + tera::ScanDescriptor desc(start_key); + desc.SetEnd(end_key); + // Notify stores in single lg + desc.AddColumnFamily(kNotifyColumnFamily); + tera::ErrorCode err; + std::unique_ptr result_stream(table->Scan(desc, &err)); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "table scan failed, " << err.ToString(); + return false; + } + + if (result_stream->Done(&err)) { + LOG(ERROR) << " ERR " << err.GetReason(); + } + + bool finished = false; + std::string rowkey; + std::vector vec_col; + while (NextRow(columns, result_stream.get(), table->GetName(), &finished, &rowkey, &vec_col)) { + // lock row + if (!TryLockRow(table->GetName(), rowkey)) { + // collision + LOG(INFO) <<"[rowlock failed] table=" << table->GetName() << " row=" << rowkey; + return false; + } + VLOG(12) <<"[time] Transaction start. [row] " << rowkey; + + // automatic unlock + std::shared_ptr unlocker( + new AutoRowUnlocker(table->GetName(), rowkey)); + + for (uint32_t i = 0; i < vec_col.size(); ++i ) { + tera::Transaction* t = NULL; + TransactionType type; + { + MutexLock locker(&table_mutex_); + type = (*table_observe_info_)[table->GetName()].type; + } + + switch (type) { + case kGlobalTransaction: + t = tera_client_->NewGlobalTransaction(); + if (t == NULL) { + LOG(ERROR) << "NewGlobalTransaction failed. Notify cell ignored. table: " << table->GetName() + << " row: " << rowkey << " family: " << vec_col[i].family + << " qualifier: " << vec_col[i].qualifier; + continue; + } + break; + case kSingleRowTransaction: + t = table->StartRowTransaction(rowkey); + if (t == NULL) { + LOG(ERROR) << "StartRowTransaction failed. Notify cell ignored. table: " << table->GetName() + << " row: " << rowkey << " family: " << vec_col[i].family + << " qualifier: " << vec_col[i].qualifier; + continue; + } + break; + default: + break; + } + std::shared_ptr notify_cell(new NotifyCell(t)); + notify_cell->table = table; + notify_cell->row = rowkey; + notify_cell->observed_column = vec_col[i]; + notify_cell->unlocker = unlocker; + + DoReadValue(notify_cell); + } + + MutexLock locker(&quit_mutex_); + if (quit_) { + return false; + } + } + if (finished) { + return true; + } else { + return false; + } + +} + +bool ScannerImpl::NextRow(const std::set& columns, tera::ResultStream* result_stream, + const std::string& table_name, bool* finished, + std::string* row, std::vector* vec_col) { + tera::ErrorCode err; + + // check finish + if (result_stream->Done(&err)) { + *finished = true; + return false; + } + + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "scanning failed" << err.ToString(); + return false; + } + + vec_col->clear(); + *row = result_stream->RowName(); + + // scan cell + while (!result_stream->Done(&err) && result_stream->RowName() == *row) { + while (transaction_threads_->PendingNum() > FLAGS_observer_max_pending_task) { + VLOG(12) << "transaction_threads pending: " << transaction_threads_->PendingNum(); + MutexLock locker(&quit_mutex_); + if (quit_) { + return false; + } + cond_.TimeWaitInUs(kObserverWaitTime); + } + std::string ob_cf; + std::string ob_qu; + + if (!ParseNotifyQualifier(result_stream->Qualifier(), &ob_cf, &ob_qu)) { + LOG(WARNING) << "parse notify qualifier failed: " << result_stream->Qualifier(); + result_stream->Next(); + continue; + } + + Column ob_col = {table_name, ob_cf, ob_qu}; + if (columns.end() == columns.find(ob_col)) { + LOG(WARNING) << "miss observed column, table_name" << table_name << + " cf=" << ob_cf << " qu=" << ob_qu; + result_stream->Next(); + continue; + } + vec_col->push_back(ob_col); + result_stream->Next(); + + } + return true; +} + +// example qualifier: C:url +// C: cf; column: url; +bool ScannerImpl::ParseNotifyQualifier(const std::string& notify_qualifier, + std::string* data_family, + std::string* data_qualifier) { + + std::vector frags; + std::size_t pos = std::string::npos; + std::size_t start_pos = 0; + std::string frag; + + // parse cf + pos = notify_qualifier.find_first_of(':', start_pos); + if (pos == std::string::npos) { + LOG(ERROR) << "Parse notify qualifier error: " << notify_qualifier; + return false; + } + frag = notify_qualifier.substr(start_pos, pos - start_pos); + frags.push_back(frag); + start_pos = pos + 1; + + pos = notify_qualifier.size(); + frag = notify_qualifier.substr(start_pos, pos - start_pos); + frags.push_back(frag); + if (2 != frags.size()) { + return false; + } + if (frags[0] == "" || frags[1] == "") { + return false; + } + *data_family = frags[0]; + *data_qualifier = frags[1]; + + return true; +} + +bool ScannerImpl::DoReadValue(std::shared_ptr notify_cell) { + VLOG(12) <<"[time] do read value start. [row] " << notify_cell->row; + std::unique_ptr row_reader(notify_cell->table->NewRowReader(notify_cell->row)); + assert(row_reader.get() != NULL); + row_reader->AddColumn(notify_cell->observed_column.family, notify_cell->observed_column.qualifier); + // transaction read + if (notify_cell->transaction != NULL) { + notify_cell->transaction->Get(row_reader.get()); + } else { + notify_cell->table->Get(row_reader.get()); + } + VLOG(12) <<"[time] do read value finish. [row] " << notify_cell->row; + if (tera::ErrorCode::kOK == row_reader->GetError().GetType()) { + notify_cell->value = row_reader->Value(); + notify_cell->timestamp = row_reader->Timestamp(); + + std::shared_ptr> table_observe_info_read_copy; + { + MutexLock locker(&table_mutex_); + // shared_ptr ref +1 + table_observe_info_read_copy = table_observe_info_; + } + + auto it = table_observe_info_read_copy->find(notify_cell->observed_column.table_name); + if (it == table_observe_info_read_copy->end()) { + LOG(WARNING) << "table not found: " << notify_cell->observed_column.table_name; + return false; + } + + if (it->second.observe_columns.find(notify_cell->observed_column) == it->second.observe_columns.end()) { + LOG(WARNING) << "column not found. cf: " << notify_cell->observed_column.family + << " qu: " << notify_cell->observed_column.qualifier; + return false; + } + + if (it->second.observe_columns[notify_cell->observed_column].size() == 0) { + LOG(WARNING) << "no match observers, table=" << notify_cell->observed_column.table_name << + " cf=" << notify_cell->observed_column.family << " qu=" << notify_cell->observed_column.qualifier; + return false; + } + + std::set& observer_set = (*table_observe_info_read_copy)[notify_cell->observed_column.table_name].observe_columns[notify_cell->observed_column]; + + // only gtxn check ack + if ((*observer_set.begin())->GetTransactionType() == kGlobalTransaction + && !CheckConflictOnAckColumn(notify_cell, observer_set)) { + LOG(WARNING) << "Ack failed ! row=" << notify_cell->row << " cf=" << notify_cell->observed_column.family << + " qu=" << notify_cell->observed_column.qualifier;; + return false; + } + // every column may have more than one observers + for (auto observer = observer_set.begin(); observer != observer_set.end(); ++observer) { + + transaction_threads_->AddTask( [=] (int64_t) { + total_counter_.Inc(); + std::unique_ptr notification(GetNotification(notify_cell->transaction)); + tera::ErrorCode err = (*observer)->OnNotify(notify_cell->transaction, tera_client_, notify_cell->observed_column.table_name, + notify_cell->observed_column.family, notify_cell->observed_column.qualifier, + notify_cell->row, notify_cell->value, notify_cell->timestamp, notification.get()); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "OnNotify failed! reason: " << err.GetReason(); + fail_counter_.Inc(); + } + }); + } + + } else { + LOG(WARNING) << "[read failed] table=" << notify_cell->table->GetName() << " cf=" << notify_cell->observed_column.family << + " qu=" << notify_cell->observed_column.qualifier << " row=" << notify_cell->row << + " err=" << row_reader->GetError().GetType() << row_reader->GetError().GetReason(); + return false; + } + + return true; +} + +void ScannerImpl::GetObserveColumns(const std::string& table_name, std::set* columns) { + columns->clear(); + + std::shared_ptr> table_observe_info_read_copy; + { + MutexLock locker(&table_mutex_); + // shared_ptr ref +1 + table_observe_info_read_copy = table_observe_info_; + } + + for (auto it : (*table_observe_info_read_copy)[table_name].observe_columns) { + columns->insert(it.first); + } +} + +tera::Table* ScannerImpl::GetTable(const std::string table_name) { + std::shared_ptr> table_observe_info_read_copy; + { + MutexLock locker(&table_mutex_); + table_observe_info_read_copy = table_observe_info_; + } + return (*table_observe_info_read_copy)[table_name].table; +} + +void ScannerImpl::Profiling() { + while (true) { + { + MutexLock locker(&quit_mutex_); + if (quit_) { + return; + } + cond_.TimeWaitInUs(kObserverWaitTime); + } + LOG(INFO) << "[Observer Profiling Info] total: " << total_counter_.Get() << + " failed: " << fail_counter_.Get() << " transaction pending: " << + transaction_threads_->PendingNum(); + total_counter_.Clear(); + fail_counter_.Clear(); + } +} + +bool ScannerImpl::CheckConflictOnAckColumn(std::shared_ptr notify_cell, + const std::set& observers) { + VLOG(12) <<"[time] Check ACK start. [cf:qu] " << notify_cell->observed_column.family + << notify_cell->observed_column.qualifier; + bool is_collision = false; + std::vector ack_qualifier_list; + std::string ack_qualifier_prefix = GetAckQualifierPrefix(notify_cell->observed_column.family, + notify_cell->observed_column.qualifier); + + // use transaction to read column Ack + std::unique_ptr row_transaction(notify_cell->table->StartRowTransaction(notify_cell->row)); + + // read Acks + std::unique_ptr row_reader(notify_cell->table->NewRowReader(notify_cell->row)); + for (auto it : observers) { + std::string ack_qualifier = GetAckQualifier(ack_qualifier_prefix, it->GetObserverName()); + ack_qualifier_list.push_back(ack_qualifier); + + row_reader->AddColumn(notify_cell->observed_column.family, ack_qualifier); + } + row_transaction->Get(row_reader.get()); + if (tera::ErrorCode::kOK == row_reader->GetError().GetType()) { + while (!row_reader->Done()) { + int64_t latest_observer_start_ts = 0; + if (!StringToNumber(row_reader->Value(), &latest_observer_start_ts)) { + LOG(ERROR) << "Convert string to timestamp failed! String: " << row_reader->Value() << + " row=" << notify_cell->row << " cf=" << notify_cell->observed_column.family << + " qu=" << notify_cell->observed_column.qualifier; + is_collision = true; + break; + } + + // collision check: ack ts later than notify ts && + if (latest_observer_start_ts >= notify_cell->timestamp && + notify_cell->transaction->GetStartTimestamp() - latest_observer_start_ts + < FLAGS_observer_ack_conflict_timeout) { + // time too short, collisision, ignore + + is_collision = true; + LOG(INFO) << "own collision. row=" << notify_cell->row << + " cf=" << notify_cell->observed_column.family << " qu=" << + notify_cell->observed_column.qualifier << + ", latest observer start_ts=" << latest_observer_start_ts << + ", observer start_ts=" << notify_cell->transaction->GetStartTimestamp() << + ", data commit_ts=" << notify_cell->timestamp; + break; + + } + row_reader->Next(); + } + } else { + LOG(INFO) << "read Acks failed, err=" << row_reader->GetError().GetReason() << + " row=" << notify_cell->row << " cf=" << notify_cell->observed_column.family << + " qu=" << notify_cell->observed_column.qualifier; + } + + if (!is_collision) { + // set Acks + std::unique_ptr mutation(notify_cell->table->NewRowMutation(notify_cell->row)); + for (size_t idx = 0; idx < ack_qualifier_list.size(); ++idx) { + mutation->Put(notify_cell->observed_column.family, ack_qualifier_list[idx], + std::to_string(notify_cell->transaction->GetStartTimestamp())); + } + row_transaction->ApplyMutation(mutation.get()); + notify_cell->table->CommitRowTransaction(row_transaction.get()); + if (row_transaction->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(INFO) << "write Ack failed, row=" << notify_cell->row << " err=" << + row_transaction->GetError().GetReason() << " cf=" << + notify_cell->observed_column.family << " qu=" << + notify_cell->observed_column.qualifier; + is_collision = true; + } + } + VLOG(12) <<"[time] Check ACK finish. [cf:qu] " << notify_cell->observed_column.family + << notify_cell->observed_column.qualifier; + + return !is_collision; +} + +std::string ScannerImpl::GetAckQualifierPrefix(const std::string& family, + const std::string& qualifier) const { + return family + ":" + qualifier; +} + +std::string ScannerImpl::GetAckQualifier(const std::string& prefix, + const std::string& observer_name) const { + return prefix + "+ack_" + observer_name; +} + +bool ScannerImpl::TryLockRow(const std::string& table_name, + const std::string& row) const { + VLOG(12) << "[time] trylock " << table_name << " " << row; + RowlockRequest request; + RowlockResponse response; + + std::shared_ptr rowlock_client; + + if (FLAGS_mock_rowlock_enable == true) { + rowlock_client.reset(new FakeRowlockClient()); + } else { + rowlock_client.reset(new RowlockClient()); + } + + request.set_table_name(table_name); + request.set_row(row); + + if (!rowlock_client->TryLock(&request, &response)) { + LOG(ERROR) << "TryLock rpc fail, row: " << row; + return false; + } + if (response.lock_status() != kLockSucc) { + LOG(INFO) << "Lock row fail, row: " << row; + return false; + } + VLOG(12) << "[time] trylock finish " << table_name << " " << row; + return true; +} + +bool ScannerImpl::CheckTransactionTypeLegalForTable(TransactionType type, + TransactionType table_type) { + if (type == table_type) { + return true; + } + + if (type == kNoneTransaction && table_type == kSingleRowTransaction) { + return true; + } + + return false; +} + +TransactionType ScannerImpl::GetTableTransactionType(tera::Table* table) { + tera::ErrorCode err; + TableImpl* table_impl(dynamic_cast(tera_client_)->OpenTableInternal(table->GetName(), &err)); + TableSchema schema = table_impl->GetTableSchema(); + + if (IsTransactionTable(schema)) { + std::set gtxn_cfs; + FindGlobalTransactionCfs(schema, >xn_cfs); + if (gtxn_cfs.size() > 0) { + return kGlobalTransaction; + } + return kSingleRowTransaction; + } + return kNoneTransaction; +} + +} // namespace observer +} // namespace tera diff --git a/src/observer/executor/scanner_impl.h b/src/observer/executor/scanner_impl.h new file mode 100644 index 000000000..833ff3fa4 --- /dev/null +++ b/src/observer/executor/scanner_impl.h @@ -0,0 +1,118 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_EXECUTOR_SCANNER_IMPL_H_ +#define TERA_OBSERVER_EXECUTOR_SCANNER_IMPL_H_ + +#include +#include + +#include "common/counter.h" +#include "common/mutex.h" +#include "common/thread_pool.h" +#include "common/thread.h" +#include "common/this_thread.h" +#include "observer/executor/notify_cell.h" +#include "observer/executor/observer.h" +#include "observer/executor/scanner.h" +#include "tera.h" + +namespace tera { +namespace observer { + +class Observer; +class KeySelector; + +class ScannerImpl : public Scanner { +private: + struct TableObserveInfo { + std::map> observe_columns; + tera::Table* table; + TransactionType type; + }; + +public: + ScannerImpl(); + virtual ~ScannerImpl(); + + virtual ErrorCode Observe(const std::string& table_name, + const std::string& column_family, + const std::string& qualifier, + Observer* observer); + + virtual bool Init(); + + virtual bool Start(); + + virtual void Exit(); + + tera::Client* GetTeraClient() const; + + static ScannerImpl* GetInstance(); + +private: + void ScanTable(); + + bool DoScanTable(tera::Table* table, + const std::set& column_set, + const std::string& start_key, + const std::string& end_key); + + bool DoReadValue(std::shared_ptr notify_cell); + + bool ParseNotifyQualifier(const std::string& notify_qualifier, + std::string* data_family, + std::string* data_qualfier); + + void GetObserveColumns(const std::string& table_name, + std::set* column_set); + + tera::Table* GetTable(const std::string table_name); + + bool NextRow(const std::set& columns, tera::ResultStream* result_stream, + const std::string& table_name, bool* finished, + std::string* row, std::vector* vec_col); + + void Profiling(); + + bool CheckConflictOnAckColumn(std::shared_ptr notify_cell, + const std::set& observers); + std::string GetAckQualifierPrefix(const std::string& family, const std::string& qualifier) const; + std::string GetAckQualifier(const std::string& prefix, const std::string& observer_name) const; + bool TryLockRow(const std::string& table_name, + const std::string& row) const; + + bool CheckTransactionTypeLegalForTable(TransactionType type, TransactionType table_type); + TransactionType GetTableTransactionType(tera::Table* table); + +private: + mutable Mutex table_mutex_; + tera::Client* tera_client_; + std::unique_ptr key_selector_; + + // map
> + std::shared_ptr> table_observe_info_; + // This set stores unique user-define observer addresses. + // Release user-define observers when scanner destruct + std::set observers_; + + std::unique_ptr scan_table_threads_; + std::unique_ptr transaction_threads_; + + // for quit + bool quit_; + mutable Mutex quit_mutex_; + common::CondVar cond_; + + common::Thread profiling_thread_; + Counter total_counter_; + Counter fail_counter_; + + static ScannerImpl* scanner_instance_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_EXECUTOR_SCANNER_IMPL_H_ diff --git a/src/observer/observer_demo/demo_entry.cc b/src/observer/observer_demo/demo_entry.cc new file mode 100644 index 000000000..7d6e3a361 --- /dev/null +++ b/src/observer/observer_demo/demo_entry.cc @@ -0,0 +1,59 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/observer_demo/demo_entry.h" + +#include "observer/executor/observer.h" +#include "observer/executor/scanner.h" +#include "observer/observer_demo/demo_observer.h" +#include "tera.h" + +std::string GetTeraEntryName() { + return "DemoEntry"; +} + +tera::TeraEntry* GetTeraEntry() { + return new tera::observer::DemoEntry(); +} + +namespace tera { +namespace observer { + +DemoEntry::DemoEntry() {} + +ErrorCode DemoEntry::Observe() { + ErrorCode err; + // new an observer ptr and do not delete it + Observer* demo = new DemoObserver(); + Observer* parser = new ParseObserver(); + Observer* single_row_observer = new SingleRowObserver(); + Observer* none_txn_observer = new NoneTransactionObserver(); + + Scanner* scanner = GetScanner(); + err = scanner->Observe("observer_test_table", "Data", "Page", demo); + if (tera::ErrorCode::kOK != err.GetType()) { + return err; + } + err = scanner->Observe("observer_test_table", "Data", "Link", demo); + if (tera::ErrorCode::kOK != err.GetType()) { + return err; + } + + err = scanner->Observe("observer_test_table", "Data", "Link", parser); + if (tera::ErrorCode::kOK != err.GetType()) { + return err; + } + + err = scanner->Observe("single_row_test_table", "Data", "Link", single_row_observer); + if (tera::ErrorCode::kOK != err.GetType()) { + return err; + } + + err = scanner->Observe("none_txn_test_table", "Data", "Link", none_txn_observer); + return err; + +} + +} // namespace observer +} // namespace tera \ No newline at end of file diff --git a/src/observer/observer_demo/demo_entry.h b/src/observer/observer_demo/demo_entry.h new file mode 100644 index 000000000..5f01ec840 --- /dev/null +++ b/src/observer/observer_demo/demo_entry.h @@ -0,0 +1,30 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_OBSERVER_DEMO_DEMO_ENTRY_H_ +#define TERA_OBSERVER_OBSERVER_DEMO_DEMO_ENTRY_H_ + +#include +#include + +#include "observer/executor/scanner_entry.h" +#include "tera.h" + +namespace tera { +namespace observer { + +class DemoEntry : public ScannerEntry { +public: + DemoEntry(); + virtual ~DemoEntry() {} + + virtual ErrorCode Observe(); +}; + + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_OBSERVER_DEMO_DEMO_ENTRY_H_ + diff --git a/src/observer/observer_demo/demo_observer.cc b/src/observer/observer_demo/demo_observer.cc new file mode 100644 index 000000000..07048af92 --- /dev/null +++ b/src/observer/observer_demo/demo_observer.cc @@ -0,0 +1,156 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/observer_demo/demo_observer.h" + +#include + +namespace tera { +namespace observer { + +ErrorCode DemoObserver::OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + VLOG(12) <<"[time] OnNotify start. [row] " << row; + LOG(INFO) << "[Notify DemoObserver] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + tera::ErrorCode err; + tera::Table* table = client->OpenTable(table_name, &err); + + // write ForwordIndex column + tera::RowMutation* mutation = table->NewRowMutation(row); + mutation->Put("Data", "ForwordIndex", "FIValue_" + row); + t->ApplyMutation(mutation); + + tera::ErrorCode error; + notification->Ack(table, row, family, qualifier); + error = t->Commit(); + delete mutation; + VLOG(12) <<"[time] OnNotify finish. [row] " << row; + return error; +} + +std::string DemoObserver::GetObserverName() const { + return "DemoObserver"; +} + +TransactionType DemoObserver::GetTransactionType() const { + return kGlobalTransaction; +} + +ErrorCode ParseObserver::OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify ParseObserver] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + tera::ErrorCode err; + // do nothing + tera::Table* table = client->OpenTable(table_name, &err); + notification->Ack(table, row, family, qualifier); + err = t->Commit(); + return err; +} + +std::string ParseObserver::GetObserverName() const { + return "ParseObserver"; +} + +TransactionType ParseObserver::GetTransactionType() const { + return kGlobalTransaction; +} + +ErrorCode SingleRowObserver::OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify SingleRowObserver] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + tera::ErrorCode err; + tera::Table* table = client->OpenTable(table_name, &err); + + // single row txn + tera::RowMutation* mutation = table->NewRowMutation(row); + mutation->Put(family, "another_qu", "value"); + t->ApplyMutation(mutation); + + tera::ErrorCode error; + notification->Ack(table, row, family, qualifier); + tera::Table* another_table = client->OpenTable("another_table", &err); + notification->Ack(another_table, "somerow", "family", "qualifier"); + error = t->Commit(); + delete mutation; + return error; +} + +std::string SingleRowObserver::GetObserverName() const { + return "SingleRowObserver"; +} + +TransactionType SingleRowObserver::GetTransactionType() const { + return kSingleRowTransaction; +} + +ErrorCode NoneTransactionObserver::OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify NoneTransactionObserver] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + tera::ErrorCode err; + tera::Table* table = client->OpenTable(table_name, &err); + + // do something + // kNoneTransaction notify + notification->Ack(table, row, family, qualifier); + + // kNoneTransaction ack + tera::Table* notify_table = client->OpenTable("notify_table", &err); + notification->Notify(notify_table, "notify_row", "family", "qualifier"); + return err; +} + +std::string NoneTransactionObserver::GetObserverName() const { + return "NoneTransactionObserver"; +} + +TransactionType NoneTransactionObserver::GetTransactionType() const { + return kNoneTransaction; +} + +} // namespace observer +} // namespace tera \ No newline at end of file diff --git a/src/observer/observer_demo/demo_observer.h b/src/observer/observer_demo/demo_observer.h new file mode 100644 index 000000000..201feebf2 --- /dev/null +++ b/src/observer/observer_demo/demo_observer.h @@ -0,0 +1,86 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_OBSERVER_DEMO_DEMO_OBSERVER_H_ +#define TERA_OBSERVER_OBSERVER_DEMO_DEMO_OBSERVER_H_ + +#include "observer/executor/observer.h" +#include "tera.h" + +namespace tera { +namespace observer { + +class DemoObserver : public tera::observer::Observer { +public: + DemoObserver() {} + virtual ~DemoObserver() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; +}; + +class ParseObserver : public tera::observer::Observer { +public: + ParseObserver() {} + virtual ~ParseObserver() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; +}; + +class SingleRowObserver : public tera::observer::Observer { +public: + SingleRowObserver() {} + virtual ~SingleRowObserver() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; +}; + +class NoneTransactionObserver : public tera::observer::Observer { +public: + NoneTransactionObserver() {} + virtual ~NoneTransactionObserver() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification); + virtual std::string GetObserverName() const; + virtual TransactionType GetTransactionType() const; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_OBSERVER_DEMO_DEMO_OBSERVER_H_ + diff --git a/src/observer/observer_demo/observe_demo_main.cc b/src/observer/observer_demo/observe_demo_main.cc new file mode 100644 index 000000000..af633255a --- /dev/null +++ b/src/observer/observer_demo/observe_demo_main.cc @@ -0,0 +1,84 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include +#include + +#include "common/base/scoped_ptr.h" +#include "common/log/log_cleaner.h" +#include "tera_entry.h" +#include "utils/utils_cmd.h" +#include "version.h" + +DECLARE_string(tera_log_prefix); +DECLARE_string(tera_local_addr); +DECLARE_bool(tera_info_log_clean_enable); + +extern std::string GetTeraEntryName(); +extern tera::TeraEntry* GetTeraEntry(); + +volatile sig_atomic_t g_quit = 0; + +static void SignalIntHandler(int sig) { + g_quit = 1; +} + +int main(int argc, char** argv) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_tera_log_prefix.empty()) { + FLAGS_tera_log_prefix = GetTeraEntryName(); + if (FLAGS_tera_log_prefix.empty()) { + FLAGS_tera_log_prefix = "tera"; + } + } + tera::utils::SetupLog(FLAGS_tera_log_prefix); + + if (argc > 1) { + std::string ext_cmd = argv[1]; + if (ext_cmd == "version") { + PrintSystemVersion(); + return 0; + } + } + + signal(SIGINT, SignalIntHandler); + signal(SIGTERM, SignalIntHandler); + + scoped_ptr entry(GetTeraEntry()); + if (entry.get() == NULL) { + return -1; + } + + if (!entry->Start()) { + return -1; + } + + // start log cleaner + if (FLAGS_tera_info_log_clean_enable) { + common::LogCleaner::StartCleaner(); + LOG(INFO) << "start log cleaner"; + } else { + LOG(INFO) << "log cleaner is disable"; + } + + while (!g_quit) { + if (!entry->Run()) { + LOG(ERROR) << "Server run error ,and then exit now "; + break; + } + } + if (g_quit) { + LOG(INFO) << "received interrupt signal from user, will stop"; + } + + common::LogCleaner::StopCleaner(); + + if (!entry->Shutdown()) { + return -1; + } + + return 0; +} diff --git a/src/observer/rowlocknode/fake_rowlock_client.h b/src/observer/rowlocknode/fake_rowlock_client.h new file mode 100644 index 000000000..d884d15e4 --- /dev/null +++ b/src/observer/rowlocknode/fake_rowlock_client.h @@ -0,0 +1,41 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_EXECUTOR_FAKE_ROWLOCK_CLIENT_H_ +#define TERA_OBSERVER_EXECUTOR_FAKE_ROWLOCK_CLIENT_H_ + +#include +#include + +#include "proto/rpc_client.h" +#include "sdk/rowlock_client.h" + +namespace tera { +namespace observer { + +class FakeRowlockClient : public RowlockClient { +public: + FakeRowlockClient() : RowlockClient("127.0.0.1:22222") {}; + ~FakeRowlockClient() {} + + virtual bool TryLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done = NULL) { + response->set_lock_status(kLockSucc); + return true; + } + + virtual bool UnLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done = NULL) { + response->set_lock_status(kLockSucc); + return true; + } +}; + +} // namespace observer +} // namespace tera +#endif // TERA_OBSERVER_EXECUTOR_FAKE_ROWLOCK_CLIENT_H_ + + diff --git a/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.cc b/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.cc new file mode 100644 index 000000000..2cf0d8974 --- /dev/null +++ b/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.cc @@ -0,0 +1,66 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlocknode/fake_rowlocknode_zk_adapter.h" + +#include + +#include + +#include "common/this_thread.h" +#include "ins_sdk.h" +#include "observer/rowlocknode/rowlocknode_zk_adapter_base.h" +#include "types.h" + +DECLARE_string(rowlock_ins_root_path); +DECLARE_int32(rowlock_server_node_num); +DECLARE_string(rowlock_fake_root_path); + +namespace tera { +namespace observer { + +FakeRowlockNodeZkAdapter::FakeRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, + const std::string& server_addr) : + rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) { +} + +FakeRowlockNodeZkAdapter::~FakeRowlockNodeZkAdapter() { +} + +void FakeRowlockNodeZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_fake_root_path; + + std::string node_num_key = root_path + kRowlockNodeNumPath; + zk::FakeZkUtil::WriteNode(node_num_key, std::to_string(FLAGS_rowlock_server_node_num)); + + // create node + int id = 0; + std::string id_lock_key; + std::string host_lock_key; + while (true) { + id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); + std::string file_path = "mkdir -p " + root_path + kRowlockNodeIdListPath; + system(file_path.c_str()); + if (zk::FakeZkUtil::WriteNode(id_lock_key, std::to_string(id))) { + break; + } else { + LOG(ERROR) << "[Fake rowlock zk]: write node " << id_lock_key << " failed"; + } + if (++id >= FLAGS_rowlock_server_node_num) { + id = 0; + } + ThisThread::Sleep(1); + } + + LOG(INFO) << "RowlockNode Id=" << id << " host=" << server_addr_ + << " nodenum=" << FLAGS_rowlock_server_node_num; +} + +void FakeRowlockNodeZkAdapter::OnLockChange(std::string session_id, bool deleted) { + _Exit(EXIT_FAILURE); +} + +} // namespace observer +} // namespace tera + diff --git a/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.h b/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.h new file mode 100644 index 000000000..686b2cdef --- /dev/null +++ b/src/observer/rowlocknode/fake_rowlocknode_zk_adapter.h @@ -0,0 +1,55 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_FAKE_ROWLOCKNODE_ZK_ADAPTER_H_ +#define TERA_OBSERVER_ROWLOCKNODE_FAKE_ROWLOCKNODE_ZK_ADAPTER_H_ + +#include +#include + +#include "observer/rowlocknode/rowlocknode_impl.h" +#include "observer/rowlocknode/rowlocknode_zk_adapter_base.h" +#include "zk/zk_adapter.h" + +namespace galaxy { +namespace ins { +namespace sdk { + class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy + +namespace tera { +namespace observer { + +class RowlockNodeImpl; + +class FakeRowlockNodeZkAdapter : public RowlockNodeZkAdapterBase { +public: + FakeRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); + virtual ~FakeRowlockNodeZkAdapter(); + virtual void Init(); + void OnLockChange(std::string session_id, bool deleted); + +private: + virtual void OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, + const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, + int err) {} + virtual void OnSessionTimeout() {} + +private: + RowlockNodeImpl* rowlocknode_impl_; + std::string server_addr_; +}; + +} // namespace observer +} // namespace tera +#endif // TERA_OBSERVER_ROWLOCKNODE_FAKE_ROWLOCKNODE_ZK_ADAPTER_H_ + diff --git a/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc b/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc new file mode 100644 index 000000000..01c9e8970 --- /dev/null +++ b/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.cc @@ -0,0 +1,55 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlocknode/ins_rowlock_client_zk_adapter.h" + +#include +#include + +#include "ins_sdk.h" + +#include "sdk/rowlock_client.h" +#include "types.h" + +DECLARE_string(rowlock_ins_root_path); +DECLARE_string(tera_ins_addr_list); +DECLARE_int32(rowlock_server_node_num); +DECLARE_int64(tera_zk_retry_period); +DECLARE_int32(tera_zk_timeout); +DECLARE_int32(tera_zk_retry_max_times); + +namespace tera { +namespace observer { + +InsRowlockClientZkAdapter::InsRowlockClientZkAdapter(RowlockClient* server_client, + const std::string& server_addr) + : ZkRowlockClientZkAdapter(server_client, server_addr), + client_(server_client), + server_addr_(server_addr) {} + +bool InsRowlockClientZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_ins_root_path; + std::vector value; + // create session + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + + // put server_node_num + std::string rowlock_proxy_path = root_path + kRowlockProxyPath; + + galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(rowlock_proxy_path + "/!", + rowlock_proxy_path + "/~"); + while (!result->Done()) { + CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); + value.push_back(result->Value()); + result->Next(); + } + delete result; + + client_->Update(value); + return true; +} + +} // namespace observer +} // namespace tera + diff --git a/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.h b/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.h new file mode 100644 index 000000000..7f56389ce --- /dev/null +++ b/src/observer/rowlocknode/ins_rowlock_client_zk_adapter.h @@ -0,0 +1,50 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCK_CLIENT_ZK_ADAPTER_H_ +#define TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCK_CLIENT_ZK_ADAPTER_H_ + +#include "observer/rowlocknode/zk_rowlock_client_zk_adapter.h" +#include "zk/zk_adapter.h" + +namespace galaxy { +namespace ins { +namespace sdk { + class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy + +namespace tera { +namespace observer { + +class RowlockClient; + +class InsRowlockClientZkAdapter : public ZkRowlockClientZkAdapter { +public: + InsRowlockClientZkAdapter(RowlockClient* server_client, const std::string& server_addr); + virtual ~InsRowlockClientZkAdapter() {}; + virtual bool Init(); +protected: + virtual void OnNodeValueChanged(const std::string& path, + const std::string& value) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, + int err) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnSessionTimeout() {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list) {} + +private: + RowlockClient* client_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCK_CLIENT_ZK_ADAPTER_H_ diff --git a/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc b/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc new file mode 100644 index 000000000..c0ec709d5 --- /dev/null +++ b/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.cc @@ -0,0 +1,80 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "common/this_thread.h" +#include "ins_sdk.h" +#include "observer/rowlocknode/ins_rowlocknode_zk_adapter.h" +#include "types.h" + +DECLARE_int64(tera_zk_retry_period); +DECLARE_string(rowlock_ins_root_path); +DECLARE_string(tera_ins_addr_list); +DECLARE_int32(rowlock_server_node_num); +DECLARE_string(rowlock_fake_root_path); + +namespace tera { +namespace observer { + +InsRowlockNodeZkAdapter::InsRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, + const std::string& server_addr) : + rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) { +} + +InsRowlockNodeZkAdapter::~InsRowlockNodeZkAdapter() { +} + +static void InsOnLockChange(const galaxy::ins::sdk::WatchParam& param, + galaxy::ins::sdk::SDKError error) { + LOG(ERROR) << "recv lock change event" ; + InsRowlockNodeZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnLockChange(param.value, param.deleted); +} + +void InsRowlockNodeZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_ins_root_path; + galaxy::ins::sdk::SDKError err; + // create session + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + // get session id + std::string session_id = ins_sdk_->GetSessionID(); + + // put server_node_num + std::string node_num_key = root_path + kRowlockNodeNumPath; + if (!ins_sdk_->Put(node_num_key, std::to_string(FLAGS_rowlock_server_node_num), &err)) { + LOG(WARNING) << "put NodeNum fail"; + } + + // create node + int id = 0; + std::string id_lock_key; + std::string host_lock_key; + while (true) { + id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); + if (ins_sdk_->Put(id_lock_key, server_addr_, &err) && galaxy::ins::sdk::kOK == err) { + host_lock_key = root_path + kRowlockNodeHostListPath + "/" + server_addr_; + CHECK(ins_sdk_->Lock(host_lock_key, &err)) << "register fail"; + break; + } + if (++id >= FLAGS_rowlock_server_node_num) { + id = 0; + } + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + + // create watch node + CHECK(ins_sdk_->Watch(host_lock_key, &InsOnLockChange, this, &err)) << "watch lock fail"; + + LOG(ERROR) << "RowlockNode Id=" << id << " host=" << server_addr_ + << " nodenum=" << FLAGS_rowlock_server_node_num; +} + +void InsRowlockNodeZkAdapter::OnLockChange(std::string session_id, bool deleted) { + _Exit(EXIT_FAILURE); +} + +} // namespace observer +} // namespace tera + diff --git a/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.h b/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.h new file mode 100644 index 000000000..b335115fa --- /dev/null +++ b/src/observer/rowlocknode/ins_rowlocknode_zk_adapter.h @@ -0,0 +1,56 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCKNODE_ZK_ADAPTER_H_ +#define TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCKNODE_ZK_ADAPTER_H_ + +#include +#include + +#include "observer/rowlocknode/rowlocknode_impl.h" +#include "observer/rowlocknode/rowlocknode_zk_adapter_base.h" +#include "zk/zk_adapter.h" + +namespace galaxy { +namespace ins { +namespace sdk { + class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy + +namespace tera { +namespace observer { + +class RowlockNodeImpl; + +class InsRowlockNodeZkAdapter : public RowlockNodeZkAdapterBase { +public: + InsRowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); + virtual ~InsRowlockNodeZkAdapter(); + virtual void Init(); + void OnLockChange(std::string session_id, bool deleted); + +private: + virtual void OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, + const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, + int err) {} + virtual void OnSessionTimeout() {} + +private: + RowlockNodeImpl* rowlocknode_impl_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_; +}; + +} // namespace observer +} // namespace tera +#endif // TERA_OBSERVER_ROWLOCKNODE_INS_ROWLOCKNODE_ZK_ADAPTER_H_ + diff --git a/src/observer/rowlocknode/remote_rowlocknode.cc b/src/observer/rowlocknode/remote_rowlocknode.cc new file mode 100644 index 000000000..533672607 --- /dev/null +++ b/src/observer/rowlocknode/remote_rowlocknode.cc @@ -0,0 +1,36 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlocknode/remote_rowlocknode.h" + +#include "gflags/gflags.h" + +DECLARE_int32(rowlock_thread_max_num); + +namespace tera { +namespace observer { + +RemoteRowlockNode::RemoteRowlockNode(RowlockNodeImpl* rowlocknode_impl) : + rowlocknode_impl_(rowlocknode_impl) { +} + +RemoteRowlockNode::~RemoteRowlockNode() { +} + +void RemoteRowlockNode::Lock(google::protobuf::RpcController* controller, + const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done) { + rowlocknode_impl_->TryLock(request, response, done); +} + +void RemoteRowlockNode::UnLock(google::protobuf::RpcController* controller, + const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done) { + rowlocknode_impl_->UnLock(request, response, done); +} + +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlocknode/remote_rowlocknode.h b/src/observer/rowlocknode/remote_rowlocknode.h new file mode 100644 index 000000000..6c65d79d2 --- /dev/null +++ b/src/observer/rowlocknode/remote_rowlocknode.h @@ -0,0 +1,37 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_REMOTE_ROWLOCKNODE_H_ +#define TERA_OBSERVER_ROWLOCKNODE_REMOTE_ROWLOCKNODE_H_ + +#include "common/base/scoped_ptr.h" +#include "common/thread_pool.h" +#include "observer/rowlocknode/rowlocknode_impl.h" + +namespace tera { +namespace observer { + +class RemoteRowlockNode : public RowlockService { +public: + explicit RemoteRowlockNode(RowlockNodeImpl* rowlocknode_impl); + ~RemoteRowlockNode(); + + void Lock(google::protobuf::RpcController* controller, + const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done); + + void UnLock(google::protobuf::RpcController* controller, + const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done); + +private: + RowlockNodeImpl* rowlocknode_impl_; +}; + +} // namespace observer +} // namespace tera +#endif // TERA_OBSERVER_ROWLOCKNODE_REMOTE_ROWLOCKNODE_H_ + diff --git a/src/observer/rowlocknode/rowlock_db.h b/src/observer/rowlocknode/rowlock_db.h new file mode 100644 index 000000000..94c98889c --- /dev/null +++ b/src/observer/rowlocknode/rowlock_db.h @@ -0,0 +1,161 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_ROWLOCK_DB_H_ +#define TERA_OBSERVER_ROWLOCKNODE_ROWLOCK_DB_H_ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "common/base/scoped_ptr.h" +#include "common/mutex.h" +#include "common/thread_pool.h" +#include "common/timer.h" + +DECLARE_int32(rowlock_db_sharding_number); +DECLARE_int32(rowlock_db_ttl); +DECLARE_int32(rowlock_timing_wheel_patch_num); + +namespace tera { +namespace observer { + +class RowlockDB { +public: + RowlockDB() + : timing_wheel_pos_(0), + timing_wheel_patch_num_(FLAGS_rowlock_timing_wheel_patch_num) { + timing_wheel_.resize(timing_wheel_patch_num_); + } + + ~RowlockDB() {} + + bool TryLock(uint64_t row) { + MutexLock locker(&mutex_); + if (locks_.find(row) == locks_.end()) { + locks_[row].reset(new uint64_t(row)); + std::weak_ptr ptr = locks_[row]; + timing_wheel_[timing_wheel_pos_].push_back(ptr); + return true; + } else { + return false; + } + } + + void UnLock(uint64_t row) { + MutexLock locker(&mutex_); + locks_.erase(row); + } + + // call this function ever timeout period + // 1. pointer of timing wheel move forward by one step + // 2. clear all the rowlock keys and remove them from locks_ + // 3. the next 60 seconds all new rowlock keys will be put into this wheel patch + void ClearTimeout() { + // pointer forward + mutex_.Lock(); + timing_wheel_pos_ = (timing_wheel_pos_ + 1) % timing_wheel_patch_num_; + std::vector> buffer; + + // release memory + buffer.swap(timing_wheel_[timing_wheel_pos_]); + mutex_.Unlock(); + + // remove key from locks_ + for (uint32_t i = 0; i < buffer.size(); ++i) { + if (!buffer[i].expired()) { + mutex_.Lock(); + auto it = buffer[i].lock(); + locks_.erase(*it); + mutex_.Unlock(); + } + } + } + + size_t Size() const { + MutexLock locker(&mutex_); + return locks_.size(); + } + +private: + mutable Mutex mutex_; + + std::unordered_map> locks_; + + // timing wheel + uint32_t timing_wheel_pos_; + uint32_t timing_wheel_patch_num_; + std::vector>> timing_wheel_; +}; + +class ShardedRowlockDB { +public: + ShardedRowlockDB() : thread_pool_(new ThreadPool(1)) { + lock_map_.resize(FLAGS_rowlock_db_sharding_number); + + for (int32_t i = 0; i < FLAGS_rowlock_db_sharding_number; ++i) { + std::unique_ptr db(new RowlockDB()); + lock_map_[i].reset(db.release()); + } + ScheduleClearTimeout(); + } + + ~ShardedRowlockDB() {} + + bool TryLock(uint64_t row) { + std::unique_ptr& db_node = lock_map_[row % FLAGS_rowlock_db_sharding_number]; + + if (db_node->TryLock(row) == true) { + return true; + } else { + return false; + } + } + + void UnLock(uint64_t row) { + std::unique_ptr& db_node = lock_map_[row % FLAGS_rowlock_db_sharding_number]; + db_node->UnLock(row); + } + + size_t Size() const { + size_t size = 0; + for (uint32_t i = 0; i < lock_map_.size(); ++i) { + size += lock_map_[i]->Size(); + } + return size; + } + +private: + void ScheduleClearTimeout() { + ClearTimeout(); + + ThreadPool::Task task = std::bind(&ShardedRowlockDB::ScheduleClearTimeout, this); + // everytime timing wheel move forward one step, every patch_num steps data will be cleared + thread_pool_->DelayTask(FLAGS_rowlock_db_ttl / FLAGS_rowlock_timing_wheel_patch_num, task); + } + + void ClearTimeout() { + for (int32_t i = 0; i < FLAGS_rowlock_db_sharding_number; ++i) { + lock_map_[i]->ClearTimeout(); + } + } + +private: + std::vector> lock_map_; + scoped_ptr thread_pool_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCK_DB_H_ diff --git a/src/observer/rowlocknode/rowlocknode_entry.cc b/src/observer/rowlocknode/rowlocknode_entry.cc new file mode 100644 index 000000000..eb2eb4e17 --- /dev/null +++ b/src/observer/rowlocknode/rowlocknode_entry.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlocknode/rowlocknode_entry.h" + +#include +#include + +#include "common/base/string_ext.h" +#include "common/base/string_number.h" +#include "common/net/ip_address.h" +#include "common/this_thread.h" +#include "common/thread_attributes.h" +#include "common/timer.h" +#include "common/counter.h" +#include "utils/rpc_timer_list.h" +#include "common/timer.h" +#include "observer/rowlocknode/remote_rowlocknode.h" + +DECLARE_string(rowlock_server_port); +DECLARE_int32(rowlock_io_service_pool_size); +DECLARE_int32(rowlock_rpc_work_thread_num); + +std::string GetTeraEntryName() { + return "rowlock"; +} + +tera::TeraEntry* GetTeraEntry() { + return new tera::observer::RowlockNodeEntry(); +} + +namespace tera { +namespace observer { + +RowlockNodeEntry::RowlockNodeEntry() : rowlocknode_impl_(NULL), remote_rowlocknode_(NULL) { + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_options.max_throughput_in = -1; + rpc_options.max_throughput_out = -1; + rpc_options.work_thread_num = FLAGS_rowlock_rpc_work_thread_num; + rpc_options.io_service_pool_size = FLAGS_rowlock_io_service_pool_size; + rpc_options.no_delay = false; //use Nagle's Algorithm + rpc_options.write_buffer_base_block_factor = 0; //64Bytes per malloc + rpc_options.read_buffer_base_block_factor = 7; //8kBytes per malloc + rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); +} + +RowlockNodeEntry::~RowlockNodeEntry() {} + +bool RowlockNodeEntry::StartServer() { + SetProcessorAffinity(); + IpAddress rowlocknode_addr("0.0.0.0", FLAGS_rowlock_server_port); + LOG(INFO) << "Start RPC server at: " << rowlocknode_addr.ToString(); + rowlocknode_impl_.reset(new RowlockNodeImpl()); + remote_rowlocknode_ = new RemoteRowlockNode(rowlocknode_impl_.get()); + rpc_server_->RegisterService(remote_rowlocknode_); + if (!rpc_server_->Start(rowlocknode_addr.ToString())) { + LOG(ERROR) << "start RPC server error"; + return false; + } + if (!rowlocknode_impl_->Init()) { + LOG(ERROR) << "fail to init rowlocknode_impl"; + return false; + } + LOG(INFO) << "finish starting RPC server"; + + return true; +} + +void RowlockNodeEntry::ShutdownServer() { + LOG(INFO) << "shut down server"; + rpc_server_->Stop(); + rowlocknode_impl_->Exit(); + rowlocknode_impl_.reset(); + LOG(INFO) << "RowlockNodeEntry stop done!"; +} + +bool RowlockNodeEntry::Run() { + ThisThread::Sleep(3000); + rowlocknode_impl_->PrintQPS(); + return true; +} + +void RowlockNodeEntry::SetProcessorAffinity() {} + +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlocknode/rowlocknode_entry.h b/src/observer/rowlocknode/rowlocknode_entry.h new file mode 100644 index 000000000..b968e8d4e --- /dev/null +++ b/src/observer/rowlocknode/rowlocknode_entry.h @@ -0,0 +1,39 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ENTRY_H_ +#define TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ENTRY_H_ + +#include + +#include "common/base/scoped_ptr.h" +#include "observer/rowlocknode/remote_rowlocknode.h" +#include "observer/rowlocknode/rowlocknode_impl.h" +#include "tera_entry.h" + +namespace tera { +namespace observer { + +class RowlockNodeEntry : public tera::TeraEntry { +public: + RowlockNodeEntry(); + virtual ~RowlockNodeEntry(); + + virtual bool StartServer(); + virtual bool Run(); + virtual void ShutdownServer(); + void SetProcessorAffinity(); + +private: + common::Mutex mutex_; + + scoped_ptr rowlocknode_impl_; + RemoteRowlockNode* remote_rowlocknode_; + scoped_ptr rpc_server_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ENTRY_H_ diff --git a/src/observer/rowlocknode/rowlocknode_impl.cc b/src/observer/rowlocknode/rowlocknode_impl.cc new file mode 100644 index 000000000..a8563a156 --- /dev/null +++ b/src/observer/rowlocknode/rowlocknode_impl.cc @@ -0,0 +1,82 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlocknode/rowlocknode_impl.h" + +#include "common/timer.h" +#include "observer/rowlocknode/fake_rowlocknode_zk_adapter.h" +#include "observer/rowlocknode/ins_rowlocknode_zk_adapter.h" +#include "observer/rowlocknode/rowlocknode_zk_adapter.h" +#include "utils/utils_cmd.h" + +DECLARE_string(rowlock_server_port); +DECLARE_string(tera_coord_type); + +namespace tera { +namespace observer { + +RowlockNodeImpl::RowlockNodeImpl() {} + +RowlockNodeImpl::~RowlockNodeImpl() {} + +bool RowlockNodeImpl::Init() { + std::string local_addr = tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_server_port; + if (FLAGS_tera_coord_type == "zk") { + zk_adapter_.reset(new RowlockNodeZkAdapter(this, local_addr)); + } else if (FLAGS_tera_coord_type == "ins") { + zk_adapter_.reset(new InsRowlockNodeZkAdapter(this, local_addr)); + } else { + zk_adapter_.reset(new FakeRowlockNodeZkAdapter(this, local_addr)); + } + + zk_adapter_->Init(); + + LOG(INFO) << "Rowlock node init finish"; + return true; +} + +bool RowlockNodeImpl::Exit() { + return true; +} + +void RowlockNodeImpl::TryLock(const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done) { + uint64_t rowlock_key = GetRowlockKey(request->table_name(), request->row()); + if (rowlock_db_.TryLock(rowlock_key)) { + response->set_lock_status(kLockSucc); + } else { + response->set_lock_status(kLockFail); + LOG(WARNING) << " table name: " << request->table_name() + << " row :" << request->row(); + } + + done->Run(); +} + +void RowlockNodeImpl::UnLock(const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done) { + uint64_t rowlock_key = GetRowlockKey(request->table_name(), request->row()); + rowlock_db_.UnLock(rowlock_key); + response->set_lock_status(kLockSucc); + done->Run(); +} + +void RowlockNodeImpl::PrintQPS() { + return; +} + +uint64_t RowlockNodeImpl::GetRowlockKey(const std::string& table_name, + const std::string& row) const { + // RowlockKey : TableName + Row + std::string rowlock_key_str = table_name + row; + return std::hash()(rowlock_key_str); + +} + + +} // namespace observer +} // namespace tera + diff --git a/src/observer/rowlocknode/rowlocknode_impl.h b/src/observer/rowlocknode/rowlocknode_impl.h new file mode 100644 index 000000000..a60b89dde --- /dev/null +++ b/src/observer/rowlocknode/rowlocknode_impl.h @@ -0,0 +1,52 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_IMPL_H_ +#define TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_IMPL_H_ + +#include +#include +#include + +#include "common/base/scoped_ptr.h" +#include "common/counter.h" +#include "common/mutex.h" +#include "observer/rowlocknode/fake_rowlocknode_zk_adapter.h" +#include "observer/rowlocknode/rowlock_db.h" +#include "observer/rowlocknode/rowlocknode_zk_adapter.h" +#include "proto/rowlocknode_rpc.pb.h" +#include "zk/zk_adapter.h" + +namespace tera { +namespace observer { + +class RowlockNodeImpl { +public: + RowlockNodeImpl(); + ~RowlockNodeImpl(); + + bool Init(); + + bool Exit(); + + void TryLock(const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done); + + void UnLock(const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done); + + void PrintQPS(); +private: + uint64_t GetRowlockKey(const std::string& table_name, const std::string& row) const; +private: + ShardedRowlockDB rowlock_db_; + std::unique_ptr zk_adapter_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_IMPL_H_ diff --git a/src/observer/rowlocknode/rowlocknode_zk_adapter.cc b/src/observer/rowlocknode/rowlocknode_zk_adapter.cc new file mode 100644 index 000000000..9d079a502 --- /dev/null +++ b/src/observer/rowlocknode/rowlocknode_zk_adapter.cc @@ -0,0 +1,119 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlocknode/rowlocknode_zk_adapter.h" + +#include + +#include "common/this_thread.h" +#include "ins_sdk.h" +#include "types.h" + +DECLARE_string(rowlock_zk_root_path); +DECLARE_string(tera_zk_addr_list); +DECLARE_int32(rowlock_server_node_num); +DECLARE_int64(tera_zk_retry_period); +DECLARE_int32(tera_zk_timeout); +DECLARE_int32(tera_zk_retry_max_times); + +namespace tera { +namespace observer { + +RowlockNodeZkAdapter::RowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, + const std::string& server_addr) : + rowlocknode_impl_(rowlocknode_impl), server_addr_(server_addr) { +} + +RowlockNodeZkAdapter::~RowlockNodeZkAdapter() { +} + +void RowlockNodeZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_zk_root_path; + std::string node_num_key = root_path + kRowlockNodeNumPath; + + int zk_errno = zk::ZE_OK;; + // init zk client + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, + FLAGS_rowlock_zk_root_path, FLAGS_tera_zk_timeout, + server_addr_, &zk_errno)) { + LOG(ERROR) << "fail to init zk : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "init zk success"; + + // get session id + int64_t session_id_int = 0; + if (!GetSessionId(&session_id_int, &zk_errno)) { + LOG(ERROR) << "get session id fail : " << zk::ZkErrnoToString(zk_errno); + return; + } + + // put server_node_num + zk_errno = zk::ZE_OK; + bool is_exist = true; + int32_t retry_count = 0; + std::string value = std::to_string(FLAGS_rowlock_server_node_num); + CheckExist(node_num_key, &is_exist, &zk_errno); + if (!is_exist) { + while (!CreateEphemeralNode(node_num_key, value, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create master node"; + return; + } + LOG(ERROR) << "retry create rowlock number node in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + } else { + WriteNode(node_num_key, value, &zk_errno); + zk_errno = zk::ZE_OK; + } + + value = server_addr_; + + // create node + int id = 0; + std::string id_lock_key; + std::string host_lock_key; + + while (true) { + id_lock_key = root_path + kRowlockNodeIdListPath + "/" + std::to_string(id); + zk_errno = zk::ZE_OK; + + if (!CreateEphemeralNode(id_lock_key, server_addr_, &zk_errno)) { + LOG(ERROR) << "create rowlock node fail: " << id_lock_key; + } else { + break; + } + LOG(ERROR) << "fail to create serve-node : " << zk::ZkErrnoToString(zk_errno); + + if (++id >= FLAGS_rowlock_server_node_num) { + id = 0; + } + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "create serve-node success"; + + is_exist = false; + + // watch my node + while (!CheckAndWatchExist(id_lock_key, &is_exist, &zk_errno)) { + LOG(ERROR) << "fail to watch serve-node : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "watch rowlock-node success"; + + if (!is_exist) { + OnLockChange(); + } +} + +void RowlockNodeZkAdapter::OnLockChange() { + _Exit(EXIT_FAILURE); +} + +} // namespace observer +} // namespace tera + diff --git a/src/observer/rowlocknode/rowlocknode_zk_adapter.h b/src/observer/rowlocknode/rowlocknode_zk_adapter.h new file mode 100644 index 000000000..67324f85f --- /dev/null +++ b/src/observer/rowlocknode/rowlocknode_zk_adapter.h @@ -0,0 +1,55 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ZK_ADAPTER_H_ +#define TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ZK_ADAPTER_H_ + +#include +#include + +#include "observer/rowlocknode/rowlocknode_impl.h" +#include "observer/rowlocknode/rowlocknode_zk_adapter_base.h" +#include "zk/zk_adapter.h" + +namespace galaxy { +namespace ins { +namespace sdk { + class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy + +namespace tera { +namespace observer { + +class RowlockNodeImpl; + +class RowlockNodeZkAdapter : public RowlockNodeZkAdapterBase { +public: + RowlockNodeZkAdapter(RowlockNodeImpl* rowlocknode_impl, const std::string& server_addr); + virtual ~RowlockNodeZkAdapter(); + virtual void Init(); + void OnLockChange(); + +private: + virtual void OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list) {} + virtual void OnNodeValueChanged(const std::string& path, + const std::string& value) {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, + int err) {} + virtual void OnSessionTimeout() {} + +private: + RowlockNodeImpl* rowlocknode_impl_; + std::string server_addr_; +}; + +} // namespace observer +} // namespace tera +#endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ZK_ADAPTER_H_ + diff --git a/src/observer/rowlocknode/rowlocknode_zk_adapter_base.h b/src/observer/rowlocknode/rowlocknode_zk_adapter_base.h new file mode 100644 index 000000000..1ef93ccfb --- /dev/null +++ b/src/observer/rowlocknode/rowlocknode_zk_adapter_base.h @@ -0,0 +1,21 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +#ifndef TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ZK_ADAPTER_BASE_H_ +#define TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ZK_ADAPTER_BASE_H_ + +#include "zk/zk_adapter.h" + +namespace tera { +namespace observer { + +class RowlockNodeZkAdapterBase : public tera::zk::ZooKeeperAdapter { +public: + virtual ~RowlockNodeZkAdapterBase() {} + virtual void Init() = 0; +}; + +} // namespace observer +} // namespace tera +#endif // TERA_OBSERVER_ROWLOCKNODE_ROWLOCKNODE_ZK_ADAPTER_BASE_H_ diff --git a/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc b/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc new file mode 100644 index 000000000..cacd993fc --- /dev/null +++ b/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.cc @@ -0,0 +1,58 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlocknode/zk_rowlock_client_zk_adapter.h" + +#include +#include + +#include "sdk/rowlock_client.h" +#include "types.h" + +DECLARE_string(rowlock_zk_root_path); +DECLARE_string(tera_zk_addr_list); +DECLARE_int32(rowlock_server_node_num); +DECLARE_int64(tera_zk_retry_period); +DECLARE_int32(tera_zk_timeout); +DECLARE_int32(tera_zk_retry_max_times); + +namespace tera { +namespace observer { + +ZkRowlockClientZkAdapter::ZkRowlockClientZkAdapter(RowlockClient* server_client, + const std::string& server_addr) + : client_(server_client), + server_addr_(server_addr) {} + +ZkRowlockClientZkAdapter::~ZkRowlockClientZkAdapter() { + ZooKeeperAdapter::Finalize(); +} + +bool ZkRowlockClientZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_zk_root_path; + std::string proxy_path = root_path + kRowlockProxyPath; + + int zk_errno = zk::ZE_OK;; + // init zk client + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, + FLAGS_rowlock_zk_root_path, FLAGS_tera_zk_timeout, + server_addr_, &zk_errno)) { + LOG(ERROR) << "fail to init zk : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + LOG(INFO) << "init zk success"; + + std::vector child; + std::vector value; + + while (!ListChildren(proxy_path, &child, &value, &zk_errno)) { + LOG(ERROR) << "fail to get proxy addr : " << zk::ZkErrnoToString(zk_errno); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + client_->Update(value); + return true; +} + +} // namespace observer +} // namespace tera \ No newline at end of file diff --git a/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.h b/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.h new file mode 100644 index 000000000..76a388895 --- /dev/null +++ b/src/observer/rowlocknode/zk_rowlock_client_zk_adapter.h @@ -0,0 +1,29 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKNODE_ZK_ROWLOCK_CLIENT_ZK_ADAPTER_H_ +#define TERA_OBSERVER_ROWLOCKNODE_ZK_ROWLOCK_CLIENT_ZK_ADAPTER_H_ + +#include "zk/zk_adapter.h" + +namespace tera { +namespace observer { + +class RowlockClient; + +class ZkRowlockClientZkAdapter : public zk::ZooKeeperLightAdapter { +public: + ZkRowlockClientZkAdapter(RowlockClient* server_client, const std::string& server_addr); + virtual ~ZkRowlockClientZkAdapter(); + virtual bool Init(); + +private: + RowlockClient* client_; + std::string server_addr_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_ROWLOCKNODE_ZK_ROWLOCK_CLIENT_ZK_ADAPTER_H_ diff --git a/src/observer/rowlockproxy/remote_rowlock_proxy.cc b/src/observer/rowlockproxy/remote_rowlock_proxy.cc new file mode 100644 index 000000000..845d30fbe --- /dev/null +++ b/src/observer/rowlockproxy/remote_rowlock_proxy.cc @@ -0,0 +1,36 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlockproxy/remote_rowlock_proxy.h" + +#include "gflags/gflags.h" + +DECLARE_int32(rowlock_thread_max_num); + +namespace tera { +namespace observer { + +RemoteRowlockProxy::RemoteRowlockProxy(RowlockProxyImpl* rowlock_proxy_impl) : + rowlock_proxy_impl_(rowlock_proxy_impl) { +} + +RemoteRowlockProxy::~RemoteRowlockProxy() { +} + +void RemoteRowlockProxy::Lock(google::protobuf::RpcController* controller, + const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done) { + rowlock_proxy_impl_->TryLock(request, response, done); +} + +void RemoteRowlockProxy::UnLock(google::protobuf::RpcController* controller, + const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done) { + rowlock_proxy_impl_->UnLock(request, response, done); +} + +} // namespace observer +} // namespace tera diff --git a/src/observer/rowlockproxy/remote_rowlock_proxy.h b/src/observer/rowlockproxy/remote_rowlock_proxy.h new file mode 100644 index 000000000..df8e2c2b8 --- /dev/null +++ b/src/observer/rowlockproxy/remote_rowlock_proxy.h @@ -0,0 +1,38 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKPROXY_REMOTE_ROWLOCK_PROXY_H_ +#define TERA_OBSERVER_ROWLOCKPROXY_REMOTE_ROWLOCK_PROXY_H_ + +#include + +#include "common/base/scoped_ptr.h" +#include "common/thread_pool.h" +#include "observer/rowlockproxy/rowlock_proxy_impl.h" + +namespace tera { +namespace observer { + +class RemoteRowlockProxy : public RowlockService { +public: + explicit RemoteRowlockProxy(RowlockProxyImpl* rowlock_proxy_impl); + ~RemoteRowlockProxy(); + + void Lock(google::protobuf::RpcController* controller, + const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done); + + void UnLock(google::protobuf::RpcController* controller, + const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done); + +private: + RowlockProxyImpl* rowlock_proxy_impl_; +}; + +} // namespace observer +} // namespace tera +#endif // TERA_OBSERVER_ROWLOCKPROXY_REMOTE_ROWLOCK_PROXY_H_ diff --git a/src/observer/rowlockproxy/rowlock_proxy_entry.cc b/src/observer/rowlockproxy/rowlock_proxy_entry.cc new file mode 100644 index 000000000..e9f19faa0 --- /dev/null +++ b/src/observer/rowlockproxy/rowlock_proxy_entry.cc @@ -0,0 +1,79 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlockproxy/rowlock_proxy_entry.h" + +#include +#include + +#include "common/base/string_ext.h" +#include "common/base/string_number.h" +#include "common/net/ip_address.h" +#include "common/this_thread.h" +#include "common/thread_attributes.h" +#include "common/timer.h" +#include "common/counter.h" +#include "utils/rpc_timer_list.h" +#include "observer/rowlockproxy/remote_rowlock_proxy.h" + +DECLARE_string(rowlock_proxy_port); +DECLARE_int32(rowlock_io_service_pool_size); +DECLARE_int32(rowlock_rpc_work_thread_num); + +std::string GetTeraEntryName() { + return "rowlock_proxy"; +} + +tera::TeraEntry* GetTeraEntry() { + return new tera::observer::RowlockProxyEntry(); +} + +namespace tera { +namespace observer { + +RowlockProxyEntry::RowlockProxyEntry() { + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_options.max_throughput_in = -1; + rpc_options.max_throughput_out = -1; + rpc_options.work_thread_num = FLAGS_rowlock_rpc_work_thread_num; + rpc_options.io_service_pool_size = FLAGS_rowlock_io_service_pool_size; + rpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); +} + +RowlockProxyEntry::~RowlockProxyEntry() {} + +bool RowlockProxyEntry::StartServer() { + IpAddress rowlock_proxy_addr("0.0.0.0", FLAGS_rowlock_proxy_port); + LOG(INFO) << "Start RPC server at: " << rowlock_proxy_addr.ToString(); + rowlock_proxy_impl_.reset(new RowlockProxyImpl()); + remote_rowlock_proxy_ = new RemoteRowlockProxy(rowlock_proxy_impl_.get()); + rpc_server_->RegisterService(remote_rowlock_proxy_); + if (!rpc_server_->Start(rowlock_proxy_addr.ToString())) { + LOG(ERROR) << "start RPC server error"; + return false; + } + if (!rowlock_proxy_impl_->Init()) { + LOG(ERROR) << "fail to init rowlocknode_impl"; + return false; + } + LOG(INFO) << "finish starting RPC server"; + + return true; +} + +void RowlockProxyEntry::ShutdownServer() { + LOG(INFO) << "shut down server"; + rpc_server_->Stop(); + + LOG(INFO) << "RowlockProxyEntry stop done!"; + _exit(0); +} + +bool RowlockProxyEntry::Run() { + ThisThread::Sleep(1000); + return true; +} + +} // namespace observer +} // namespace tera \ No newline at end of file diff --git a/src/observer/rowlockproxy/rowlock_proxy_entry.h b/src/observer/rowlockproxy/rowlock_proxy_entry.h new file mode 100644 index 000000000..547cf8d04 --- /dev/null +++ b/src/observer/rowlockproxy/rowlock_proxy_entry.h @@ -0,0 +1,37 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ENTRY_H_ +#define TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ENTRY_H_ + +#include + +#include + +#include "observer/rowlockproxy/remote_rowlock_proxy.h" +#include "observer/rowlockproxy/rowlock_proxy_impl.h" +#include "tera_entry.h" + +namespace tera { +namespace observer { + +class RowlockProxyEntry : public tera::TeraEntry { +public: + RowlockProxyEntry(); + virtual ~RowlockProxyEntry(); + + virtual bool StartServer(); + virtual bool Run(); + virtual void ShutdownServer(); + +private: + std::unique_ptr rowlock_proxy_impl_; + RemoteRowlockProxy* remote_rowlock_proxy_; + std::unique_ptr rpc_server_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ENTRY_H_ \ No newline at end of file diff --git a/src/observer/rowlockproxy/rowlock_proxy_impl.cc b/src/observer/rowlockproxy/rowlock_proxy_impl.cc new file mode 100644 index 000000000..0a499dabb --- /dev/null +++ b/src/observer/rowlockproxy/rowlock_proxy_impl.cc @@ -0,0 +1,146 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlockproxy/rowlock_proxy_impl.h" + +#include + +#include "common/timer.h" +#include "utils/utils_cmd.h" + +DECLARE_string(rowlock_proxy_port); +DECLARE_string(tera_coord_type); +DECLARE_bool(rowlock_proxy_async_enable); + +namespace tera { +namespace observer { + +RowlockProxyImpl::RowlockProxyImpl() + : server_addrs_(new std::vector), + clients_(new std::map), + server_number_(1) {} + +RowlockProxyImpl::~RowlockProxyImpl() { + for (auto it = clients_->begin(); it != clients_->end(); ++it) { + delete it->second; + } +} + +bool RowlockProxyImpl::Init() { + if (FLAGS_tera_coord_type == "zk") { + zk_adapter_.reset(new RowlockProxyZkAdapter(this, + tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_proxy_port)); + } else { + zk_adapter_.reset(new InsRowlockProxyZkAdapter(this, + tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_proxy_port)); + } + + if (!zk_adapter_->Init()) { + LOG(ERROR) << "init zk adapter fail"; + return false; + } + + LOG(INFO) << "Rowlock node init finish"; + return true; +} + +void RowlockProxyImpl::TryLock(const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done) { + + uint64_t rowlock_key = GetRowKey(request->table_name(), request->row()); + std::string addr = ScheduleRowKey(rowlock_key); + + // read + std::shared_ptr> read_clients; + { + MutexLock locker(&client_mutex_); + // copy-on-write, ref +1 + read_clients = clients_; + } + + if (FLAGS_rowlock_proxy_async_enable == false) { + (*read_clients)[addr]->TryLock(request, response); + done->Run(); + } else { + (*read_clients)[addr]->TryLock(request, response, [&] (RowlockRequest*, RowlockResponse*, bool, int) {done->Run();}); + } + +} + +void RowlockProxyImpl::UnLock(const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done) { + + uint64_t rowlock_key = GetRowKey(request->table_name(), request->row()); + std::string addr = ScheduleRowKey(rowlock_key); + + // read + std::shared_ptr> read_clients; + { + MutexLock locker(&client_mutex_); + // copy for copy-on-write, ref +1 + read_clients = clients_; + } + + if (FLAGS_rowlock_proxy_async_enable == false) { + (*read_clients)[addr]->UnLock(request, response); + done->Run(); + } else { + (*read_clients)[addr]->UnLock(request, response, [&] (RowlockRequest*, RowlockResponse*, bool, int) {done->Run();}); + } +} + +uint64_t RowlockProxyImpl::GetRowKey(const std::string& table_name, + const std::string& row) const { + std::string rowkey_str = table_name + row; + return std::hash()(rowkey_str); +} + +std::string RowlockProxyImpl::ScheduleRowKey(uint64_t row_key) { + std::shared_ptr> server_addrs_copy; + + MutexLock locker(&server_addrs_mutex_); + // copy for copy-on-write, ref +1 + server_addrs_copy = server_addrs_; + + return (*server_addrs_copy)[row_key % server_number_]; +} + +void RowlockProxyImpl::SetServerNumber(uint32_t number) { + MutexLock locker(&server_addrs_mutex_); + + server_number_ = number; + + if (server_addrs_->size() < number) { + server_addrs_->resize(number); + } +} + +void RowlockProxyImpl::UpdateServers(uint32_t id, const std::string& addr) { + // update data first + { + MutexLock locker(&server_addrs_mutex_); + (*server_addrs_)[id] = addr; + } + + MutexLock locker(&client_mutex_); + if(!clients_.unique()) { + clients_.reset(new std::map(*clients_)); + } + + if (clients_->find(addr) == clients_->end()) { + clients_->insert(make_pair(addr, new RowlockStub(addr))); + } +} + +uint32_t RowlockProxyImpl::GetServerNumber() { + return server_number_; +} + +} // namespace observer +} // namespace tera + + + diff --git a/src/observer/rowlockproxy/rowlock_proxy_impl.h b/src/observer/rowlockproxy/rowlock_proxy_impl.h new file mode 100644 index 000000000..4417c3973 --- /dev/null +++ b/src/observer/rowlockproxy/rowlock_proxy_impl.h @@ -0,0 +1,68 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_IMPL_H_ +#define TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_IMPL_H_ + +#include +#include +#include + +#include "common/counter.h" +#include "common/mutex.h" +#include "observer/rowlockproxy/rowlock_proxy_zk_adapter.h" +#include "proto/rowlocknode_rpc.pb.h" +#include "sdk/rowlock_client.h" +#include "zk/zk_adapter.h" + +namespace tera { +namespace observer { + +class RowlockProxyZkAdapterBase; +class RowLockStub; + +class RowlockProxyImpl { +public: + RowlockProxyImpl(); + ~RowlockProxyImpl(); + + bool Init(); + + void TryLock(const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done); + + void UnLock(const RowlockRequest* request, + RowlockResponse* response, + google::protobuf::Closure* done); + + // for zk + void SetServerNumber(uint32_t number); + uint32_t GetServerNumber(); + void UpdateServers(uint32_t id, const std::string& addr); +private: + uint64_t GetRowKey(const std::string& table_name, + const std::string& row) const; + // rowkey -> server addr + std::string ScheduleRowKey(uint64_t row_key); + +private: + common::Mutex server_addrs_mutex_; + // a map from virtual node to server addr + // key: vector index, virtual node number + // value: vector value, server address + // shared_ptr: used for copy-on-write + std::shared_ptr> server_addrs_; + + common::Mutex client_mutex_; + std::shared_ptr> clients_; + + uint32_t server_number_; + std::unique_ptr zk_adapter_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_IMPL_H_ diff --git a/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.cc b/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.cc new file mode 100644 index 000000000..290c6815c --- /dev/null +++ b/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.cc @@ -0,0 +1,411 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "observer/rowlockproxy/rowlock_proxy_zk_adapter.h" + +#include +#include + +#include "common/base/string_number.h" +#include "observer/rowlockproxy/rowlock_proxy_impl.h" +#include "types.h" +#include "ins_sdk.h" + +DECLARE_string(rowlock_zk_root_path); +DECLARE_string(tera_zk_addr_list); +DECLARE_int32(rowlock_server_node_num); +DECLARE_int64(tera_zk_retry_period); +DECLARE_int32(tera_zk_timeout); +DECLARE_int32(tera_zk_retry_max_times); + +DECLARE_string(rowlock_ins_root_path); +DECLARE_string(tera_ins_addr_list); + +namespace tera { +namespace observer { + +RowlockProxyZkAdapter::RowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, + const std::string& server_addr) + : rowlock_proxy_impl_(rowlock_proxy_impl), + server_addr_(server_addr) {} + +bool RowlockProxyZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_zk_root_path; + std::string node_num_key = root_path + kRowlockNodeNumPath; + std::string id_lock_path; + std::string proxy_path = root_path + kRowlockProxyPath + "/" + server_addr_; + + int zk_errno = zk::ZE_OK; + int32_t retry_count = 0; + // init zk client + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, + FLAGS_rowlock_zk_root_path, FLAGS_tera_zk_timeout, + server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); + return false; + } + + LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) + << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " + << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "init zk success"; + + // get session id + int64_t session_id_int = 0; + if (!GetSessionId(&session_id_int, &zk_errno)) { + LOG(ERROR) << "get session id fail : " << zk::ZkErrnoToString(zk_errno); + return false; + } + + bool is_exist = false; + uint32_t node_num; + while(!is_exist) { + CheckExist(node_num_key, &is_exist, &zk_errno); + if (!is_exist) { + LOG(ERROR) << "rowlock service number node not found: " << node_num_key + << " make sure rowlock zk available"; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + } + std::string value; + ReadAndWatchNode(node_num_key, &value, &zk_errno); + + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return false; + } + + rowlock_proxy_impl_->SetServerNumber(node_num); + + retry_count = 0; + id_lock_path = root_path + kRowlockNodeIdListPath; + std::vector name_list; + std::vector data_list; + + while (!ListAndWatchChildren(id_lock_path, &name_list, &data_list, + &zk_errno) || name_list.size() != node_num) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to watch rowlock server list or lack rowlock server"; + return false; + } + LOG(ERROR) << "retry watch rowlock server list in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count + << " node_num: " << node_num << " list size: " << name_list.size(); + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + size_t list_count = name_list.size(); + for (size_t i = 0; i < list_count; i++) { + const std::string& name = name_list[i]; + const std::string& data = data_list[i]; + + uint32_t id; + StringToNumber(name, &id); + rowlock_proxy_impl_->UpdateServers(id, data); + } + + // create proxy node + retry_count = 0; + while (!CreateEphemeralNode(proxy_path, server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create proxy node"; + return false; + } + LOG(ERROR) << "retry create rowlock number node in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + return true; +} +void RowlockProxyZkAdapter::OnNodeValueChanged(const std::string& path, + const std::string& value) { + std::string value_str; + int zk_errno = zk::ZE_OK; + std::string node_num_key = FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath; + + if (path == node_num_key) { + LOG(WARNING) << "rowlock service server number changed to " << value; + uint32_t node_num; + StringToNumber(value, &node_num); + rowlock_proxy_impl_->SetServerNumber(node_num); + ReadAndWatchNode(node_num_key, &value_str, &zk_errno); + } +} + +void RowlockProxyZkAdapter::OnWatchFailed(const std::string& path, int watch_type, + int err) { + LOG(ERROR) << "watch failed ! " << path; + _Exit(EXIT_FAILURE); +} + +void RowlockProxyZkAdapter::OnSessionTimeout() { + LOG(ERROR) << "zk session timeout!"; + _Exit(EXIT_FAILURE); +} + +void RowlockProxyZkAdapter::OnNodeCreated(const std::string& path) { + std::string value; + int zk_errno = zk::ZE_OK; + + if (path == FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath) { + LOG(WARNING) << "rowlock service number node create"; + ReadAndWatchNode(path, &value, &zk_errno); + uint32_t node_num; + StringToNumber(value, &node_num); + rowlock_proxy_impl_->SetServerNumber(node_num); + } else { + std::string id_str = path.substr(path.find_last_of("/"), + path.size() - path.find_last_of("/")); + uint32_t id; + StringToNumber(id_str, &id); + ReadAndWatchNode(path, &value, &zk_errno); + rowlock_proxy_impl_->UpdateServers(id, value); + } +} + +void RowlockProxyZkAdapter::OnNodeDeleted(const std::string& path) { + LOG(ERROR) << "node deleted: " << path; + + int zk_errno = zk::ZE_OK; + bool is_exist = false; + if (path == FLAGS_rowlock_zk_root_path + kRowlockNodeNumPath) { + while(!is_exist) { + CheckExist(path, &is_exist, &zk_errno); + if (!is_exist) { + LOG(ERROR) << "rowlock service number node not found: " << path + << " make sure rowlock zk available"; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + + std::string value; + ReadAndWatchNode(path, &value, &zk_errno); + uint32_t node_num; + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return; + } + + rowlock_proxy_impl_->SetServerNumber(node_num); + } + return; + } + // server node + std::string id_str = path.substr(path.find_last_of("/"), + path.size() - path.find_last_of("/")); + uint32_t id; + StringToNumber(id_str, &id); + + if (id >= rowlock_proxy_impl_->GetServerNumber()) { + return; + } + + while(!is_exist) { + CheckExist(path, &is_exist, &zk_errno); + if (!is_exist) { + LOG(ERROR) << "rowlock server node not found: " << path; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + + std::string value; + ReadAndWatchNode(path, &value, &zk_errno); + uint32_t node_num; + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return; + } + + rowlock_proxy_impl_->UpdateServers(node_num, value); + } +} + +void RowlockProxyZkAdapter::OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list) { + std::string root_path = FLAGS_rowlock_ins_root_path; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + std::string id_lock_path = root_path + kRowlockNodeIdListPath; + std::vector names; + std::vector datum; + + while (!ListAndWatchChildren(id_lock_path, &names, &datum, + &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to watch rowlock server list or lack rowlock server"; + _Exit(EXIT_FAILURE); + } + LOG(ERROR) << "retry watch rowlock server list in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + size_t list_count = name_list.size(); + for (size_t i = 0; i < list_count; i++) { + const std::string& name = names[i]; + const std::string& data = datum[i]; + + uint32_t id; + StringToNumber(name, &id); + rowlock_proxy_impl_->UpdateServers(id, data); + } +} + +// ins + +InsRowlockProxyZkAdapter::InsRowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, + const std::string& server_addr) + : rowlock_proxy_impl_(rowlock_proxy_impl), + server_addr_(server_addr) {} + +static void InsOnNumberChange(const galaxy::ins::sdk::WatchParam& param, + galaxy::ins::sdk::SDKError error) { + InsRowlockProxyZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnValueChange(param.key, param.value); +} + +static void InsOnServerChange(const galaxy::ins::sdk::WatchParam& param, + galaxy::ins::sdk::SDKError error) { + InsRowlockProxyZkAdapter* ins_adp = static_cast(param.context); + ins_adp->OnServerChange(); +} + +bool InsRowlockProxyZkAdapter::Init() { + std::string root_path = FLAGS_rowlock_ins_root_path; + std::string node_num_key = root_path + kRowlockNodeNumPath; + std::string proxy_path = root_path + kRowlockProxyPath + "/" + server_addr_; + std::string value; + galaxy::ins::sdk::SDKError err; + + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + + LOG(INFO) << "init ins success"; + + if (!ins_sdk_->Get(node_num_key, &value, &err)) { + LOG(ERROR) << "ins rowlock service number node not found: " << node_num_key + << " make sure rowlock ins available"; + return false; + } + + uint32_t node_num; + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return false; + } + rowlock_proxy_impl_->SetServerNumber(node_num); + + if (!ins_sdk_->Watch(node_num_key, InsOnNumberChange, this, &err)) { + LOG(ERROR) << "try to watch number node ,path=" << node_num_key << " failed," + << ins_sdk_->ErrorToString(err); + return false; + } + + + // read server addr + int32_t retry_count = 0; + std::string id_lock_path = root_path + kRowlockNodeIdListPath; + + while (!ins_sdk_->Watch(id_lock_path, InsOnServerChange, this, &err)) { + LOG(ERROR) << "try to watch server node ,path=" << id_lock_path << " failed," + << ins_sdk_->ErrorToString(err); + if (retry_count++ > FLAGS_tera_zk_retry_max_times) { + return false; + } + } + + galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(id_lock_path+"/!", + id_lock_path+"/~"); + while (!result->Done()) { + CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); + std::string value = result->Value(); + std::string key = result->Key(); + VLOG(12) << "Key: " << key << " value: " << value; + + uint32_t node_num; + uint32_t pos = key.find_last_of("/") + 1; + key = key.substr(pos, key.length() - pos); + VLOG(12) << "key: " << key; + if (!StringToNumber(key, &node_num)) { + LOG(ERROR) << "read number node fail"; + _Exit(EXIT_FAILURE); + } + + rowlock_proxy_impl_->UpdateServers(node_num, value); + result->Next(); + } + delete result; + + // create proxy node + retry_count = 0; + while (!ins_sdk_->Put(proxy_path, server_addr_, &err)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create proxy node"; + return false; + } + LOG(ERROR) << "retry create rowlock number node in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + } + return true; +} + +void InsRowlockProxyZkAdapter::OnValueChange(const std::string& path, const std::string& value) { + uint32_t node_num; + galaxy::ins::sdk::SDKError err; + + if (!StringToNumber(value, &node_num)) { + LOG(ERROR) << "read number node fail"; + return; + } + rowlock_proxy_impl_->SetServerNumber(node_num); + + if (!ins_sdk_->Watch(path, InsOnNumberChange, this, &err)) { + LOG(ERROR) << "try to watch number node ,path=" << path << " failed," + << ins_sdk_->ErrorToString(err); + return; + } +} + +void InsRowlockProxyZkAdapter::OnServerChange() { + galaxy::ins::sdk::SDKError err; + std::string root_path = FLAGS_rowlock_ins_root_path; + + int32_t retry_count = 0; + std::string id_lock_path = root_path + kRowlockNodeIdListPath; + + while (!ins_sdk_->Watch(id_lock_path, InsOnServerChange, this, &err)) { + LOG(ERROR) << "try to watch server node ,path=" << id_lock_path << " failed," + << ins_sdk_->ErrorToString(err); + if (retry_count++ > FLAGS_tera_zk_retry_max_times) { + _Exit(EXIT_FAILURE); + } + } + + galaxy::ins::sdk::ScanResult* result = ins_sdk_->Scan(id_lock_path+"/!", + id_lock_path+"/~"); + while (!result->Done()) { + CHECK_EQ(result->Error(), galaxy::ins::sdk::kOK); + std::string value = result->Value(); + std::string key = result->Key(); + + uint32_t node_num; + uint32_t pos = key.find_last_of("/") + 1; + key = key.substr(pos, key.length() - pos); + VLOG(12) << "key: " << key; + if (!StringToNumber(key, &node_num)) { + LOG(ERROR) << "read number node fail"; + _Exit(EXIT_FAILURE); + } + + rowlock_proxy_impl_->UpdateServers(node_num, value); + result->Next(); + } + delete result; +} + +} // namespace observer +} // namespace tera \ No newline at end of file diff --git a/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.h b/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.h new file mode 100644 index 000000000..02125135c --- /dev/null +++ b/src/observer/rowlockproxy/rowlock_proxy_zk_adapter.h @@ -0,0 +1,83 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ZK_ADAPTER_H_ +#define TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ZK_ADAPTER_H_ + +#include "zk/zk_adapter.h" + +namespace galaxy { +namespace ins { +namespace sdk { + class InsSDK; +} // namespace sdk +} // namespace ins +} // namespace galaxy + +namespace tera { +namespace observer { + +class RowlockProxyImpl; + +class RowlockProxyZkAdapterBase : public zk::ZooKeeperAdapter { +public: + virtual ~RowlockProxyZkAdapterBase() {} + virtual bool Init() = 0; +}; + +class RowlockProxyZkAdapter : public RowlockProxyZkAdapterBase { +public: + RowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, const std::string& server_addr); + virtual ~RowlockProxyZkAdapter() {} + virtual bool Init(); + +protected: + virtual void OnNodeValueChanged(const std::string& path, + const std::string& value); + virtual void OnWatchFailed(const std::string& path, int watch_type, + int err); + virtual void OnNodeDeleted(const std::string& path); + virtual void OnSessionTimeout(); + virtual void OnNodeCreated(const std::string& path); + virtual void OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list); + +private: + RowlockProxyImpl* rowlock_proxy_impl_; + std::string server_addr_; + +}; + +class InsRowlockProxyZkAdapter : public RowlockProxyZkAdapterBase { +public: + InsRowlockProxyZkAdapter(RowlockProxyImpl* rowlock_proxy_impl, const std::string& server_addr); + virtual ~InsRowlockProxyZkAdapter() {} + virtual bool Init(); + + void OnValueChange(const std::string& path, const std::string& value); + void OnServerChange(); + +protected: + virtual void OnNodeValueChanged(const std::string& path, + const std::string& value) {} + virtual void OnWatchFailed(const std::string& path, int watch_type, + int err) {} + virtual void OnNodeDeleted(const std::string& path) {} + virtual void OnSessionTimeout() {} + virtual void OnNodeCreated(const std::string& path) {} + virtual void OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list) {} + +private: + RowlockProxyImpl* rowlock_proxy_impl_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_; +}; + +} // namespace observer +} // namespace tera + +#endif // TERA_OBSERVER_ROWLOCKPROXY_ROWLOCK_PROXY_ZK_ADAPTER_H_ \ No newline at end of file diff --git a/src/observer/test/observer_test.cc b/src/observer/test/observer_test.cc new file mode 100644 index 000000000..299ec4581 --- /dev/null +++ b/src/observer/test/observer_test.cc @@ -0,0 +1,587 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include +#include +#include + +#include "common/thread_pool.h" +#include "observer/executor/observer.h" +#include "observer/executor/random_key_selector.h" +#include "observer/executor/scanner.h" +#include "observer/executor/scanner_impl.h" +#include "observer/executor/notification_impl.h" +#include "sdk/client_impl.h" +#include "sdk/sdk_utils.h" +#include "tera.h" +#include "types.h" + +DECLARE_bool(tera_gtxn_test_opened); +DECLARE_int64(start_ts); +DECLARE_int64(begin_commit_ts); +DECLARE_int64(begin_prewrite_ts); +DECLARE_int64(end_prewrite_ts); +DECLARE_int64(commit_ts); +DECLARE_string(flagfile); +DECLARE_string(tera_coord_type); +DECLARE_bool(tera_sdk_client_for_gtxn); +DECLARE_bool(mock_rowlock_enable); + +namespace tera { +namespace observer { + +class TestWorker : public Observer { +public: + TestWorker(): counter_(0), notified_(false) {} + virtual ~TestWorker() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify DemoObserver] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + table_name_ = table_name; + family_ = family; + qualifier_ = qualifier; + row_ = row; + value_ = value; + + tera::ErrorCode err; + notified_ = true; + ++counter_; + + tera::Table* table = client->OpenTable(table_name, &err); + notification->Ack(table, row, family, qualifier); + + return err; + } + + virtual std::string GetObserverName() const { + return "DemoObserver"; + } + + virtual TransactionType GetTransactionType() const { + return kGlobalTransaction; + } +private: + std::atomic counter_; + std::atomic notified_; + + std::string table_name_; + std::string family_; + std::string qualifier_; + std::string row_; + std::string value_; +}; + +class TestWorkerGTX : public Observer { +public: + TestWorkerGTX(): counter_(0), notified_(false) {} + virtual ~TestWorkerGTX() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify TestWorkerGTX] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + table_name_ = table_name; + family_ = family; + qualifier_ = qualifier; + row_ = row; + value_ = value; + + tera::ErrorCode err; + notified_ = true; + ++counter_; + + tera::Table* table = client->OpenTable(table_name, &err); + + // write ForwordIndex column + tera::RowMutation* mutation = table->NewRowMutation(row); + mutation->Put(family, qualifier + "_test", row + "_"); + t->ApplyMutation(mutation); + + tera::ErrorCode error; + t->Ack(table, row, family, qualifier); + table->CommitRowTransaction(t); + delete mutation; + return error; + + return err; + } + + virtual std::string GetObserverName() const { + return "DemoObserver"; + } + + virtual TransactionType GetTransactionType() const { + return kSingleRowTransaction; + } +private: + std::atomic counter_; + std::atomic notified_; + + std::string table_name_; + std::string family_; + std::string qualifier_; + std::string row_; + std::string value_; +}; + +class DemoObserver : public tera::observer::Observer { +public: + DemoObserver() {} + virtual ~DemoObserver() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify ParseObserver] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + tera::ErrorCode err; + // do nothing + return err; + } + virtual std::string GetObserverName() const { + return "DemoObserver"; + } + virtual TransactionType GetTransactionType() const { + return kGlobalTransaction; + } +}; + +class TestWorkerNTX : public Observer { +public: + TestWorkerNTX(): counter_(0), notified_(false) {} + virtual ~TestWorkerNTX() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify TestWorkerNTX] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + table_name_ = table_name; + family_ = family; + qualifier_ = qualifier; + row_ = row; + value_ = value; + + tera::ErrorCode err; + notified_ = true; + ++counter_; + + // do something without transaction + + return err; + } + + virtual std::string GetObserverName() const { + return "DemoObserver"; + } + + virtual TransactionType GetTransactionType() const { + return kNoneTransaction; + } +private: + std::atomic counter_; + std::atomic notified_; + + std::string table_name_; + std::string family_; + std::string qualifier_; + std::string row_; + std::string value_; +}; + +class ObserverImplTest : public ::testing::Test { +public: + void OnNotifyTest() { + tera::ErrorCode err; + tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; + } + + // create table + tera::TableDescriptor table_desc("observer_test_table"); + table_desc.EnableTxn(); + + table_desc.AddLocalityGroup("lg1"); + tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); + cf1->EnableGlobalTransaction(); + cf1->EnableNotify(); + ExtendNotifyLgToDescriptor(&table_desc); + + client->CreateTable(table_desc, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Create table fail"; + } + + tera::Table* table = client->OpenTable("observer_test_table", &err); + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "open table failed"; + return; + } + + std::unique_ptr t(table->StartRowTransaction("www.baidu.com")); + + assert(t != NULL); + std::unique_ptr mu0(table->NewRowMutation("www.baidu.com")); + mu0->Put("_N_", "cf:Page", "I am not important"); + t->ApplyMutation(mu0.get()); + t->Commit(); + + std::unique_ptr g_txn(client->NewGlobalTransaction()); + assert(g_txn != NULL); + std::unique_ptr mu1(table->NewRowMutation("www.baidu.com")); + + mu1->Put("cf", "Page", "hello world", -1); + g_txn->ApplyMutation(mu1.get()); + g_txn->Commit(); + + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + } else { + std::cout << "commit success" << std::endl; + } + + // varibles for fake timeoracle + FLAGS_start_ts = 10; + FLAGS_begin_commit_ts = 1; + FLAGS_begin_prewrite_ts = 1; + FLAGS_end_prewrite_ts = 1; + FLAGS_commit_ts = 13; + + Observer* observer = new TestWorker(); + Observer* demo = new DemoObserver(); + + Scanner* scanner = new ScannerImpl(); + bool ret = scanner->Init(); + EXPECT_EQ(true, ret); + if(!ret) { + LOG(ERROR) << "fail to init scanner_impl"; + return; + } + + err = scanner->Observe("observer_test_table", "cf", "Page", observer); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + err = scanner->Observe("observer_test_table", "cf", "Page", demo); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + if(!scanner->Start()) { + LOG(ERROR) << "fail to start scanner_impl"; + return; + } + + while (!static_cast(observer)->notified_) { + sleep(1); + } + + + EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); + EXPECT_EQ("observer_test_table", static_cast(observer)->table_name_); + EXPECT_EQ("cf", static_cast(observer)->family_); + EXPECT_EQ("Page", static_cast(observer)->qualifier_); + EXPECT_EQ("hello world", static_cast(observer)->value_); + + scanner->Exit(); + delete scanner; + } + + void SingleRowTransactionTest() { + tera::ErrorCode err; + tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; + } + + // create table + tera::TableDescriptor table_desc("observer_table_gtx"); + table_desc.EnableTxn(); + + table_desc.AddLocalityGroup("lg1"); + tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); + cf1->EnableNotify(); + ExtendNotifyLgToDescriptor(&table_desc); + + client->CreateTable(table_desc, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Create table fail"; + } + + tera::Table* table = client->OpenTable("observer_table_gtx", &err); + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "open table failed"; + return; + } + + std::unique_ptr t(table->StartRowTransaction("www.baidu.com")); + + assert(t != NULL); + std::unique_ptr mu0(table->NewRowMutation("www.baidu.com")); + mu0->Put("_N_", "cf:Page", "I am not important"); + mu0->Put("cf", "Page", "hello world", -1); + t->ApplyMutation(mu0.get()); + t->Commit(); + + if (t->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << t->GetError().ToString() << std::endl; + } else { + std::cout << "commit success" << std::endl; + } + + Observer* observer = new TestWorkerGTX(); + + Scanner* scanner = new ScannerImpl(); + bool ret = scanner->Init(); + + EXPECT_EQ(true, ret); + if(!ret) { + LOG(ERROR) << "fail to init scanner_impl"; + return; + } + + err = scanner->Observe("observer_table_gtx", "cf", "Page", observer); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + if(!scanner->Start()) { + LOG(ERROR) << "fail to start scanner_impl"; + return; + } + + while (!static_cast(observer)->notified_) { + sleep(1); + } + + EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); + EXPECT_EQ("observer_table_gtx", static_cast(observer)->table_name_); + EXPECT_EQ("cf", static_cast(observer)->family_); + EXPECT_EQ("Page", static_cast(observer)->qualifier_); + EXPECT_EQ("hello world", static_cast(observer)->value_); + scanner->Exit(); + delete scanner; + } + + void NonTransactionTest() { + tera::ErrorCode err; + tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; + } + + // create table + tera::TableDescriptor table_desc("observer_table_ntx"); + + table_desc.AddLocalityGroup("lg1"); + tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); + cf1->EnableNotify(); + ExtendNotifyLgToDescriptor(&table_desc); + + client->CreateTable(table_desc, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Create table fail"; + } + + tera::Table* table = client->OpenTable("observer_table_ntx", &err); + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "open table failed"; + return; + } + + table->Put("www.baidu.com", "_N_", "cf:Page", "I am not important", &err); + table->Put("www.baidu.com", "cf", "Page", "hello world", -1, &err); + + Observer* observer = new TestWorkerNTX(); + + Scanner* scanner = new ScannerImpl(); + bool ret = scanner->Init(); + + EXPECT_EQ(true, ret); + if(!ret) { + LOG(ERROR) << "fail to init scanner_impl"; + return; + } + + err = scanner->Observe("observer_table_ntx", "cf", "Page", observer); + EXPECT_EQ(err.GetType(), tera::ErrorCode::kOK); + + if(!scanner->Start()) { + LOG(ERROR) << "fail to start scanner_impl"; + return; + } + + while (!static_cast(observer)->notified_) { + sleep(1); + } + + EXPECT_EQ("www.baidu.com", static_cast(observer)->row_); + EXPECT_EQ("observer_table_ntx", static_cast(observer)->table_name_); + EXPECT_EQ("cf", static_cast(observer)->family_); + EXPECT_EQ("Page", static_cast(observer)->qualifier_); + EXPECT_EQ("hello world", static_cast(observer)->value_); + scanner->Exit(); + delete scanner; + } + + void ObserveTest() { + tera::ErrorCode err; + tera::Client* client = tera::Client::NewClient(FLAGS_flagfile, &err); + // for ut test + EXPECT_EQ(tera::ErrorCode::kOK, err.GetType()); + // for no core + if (tera::ErrorCode::kOK != err.GetType()) { + LOG(ERROR) << "new client failed"; + return; + } + + // create table + tera::TableDescriptor table_desc("observer_table"); + table_desc.EnableTxn(); + table_desc.AddLocalityGroup("notify"); + tera::ColumnFamilyDescriptor* cf_t = table_desc.AddColumnFamily(kNotifyColumnFamily, "notify"); + cf_t->EnableGlobalTransaction(); + + table_desc.AddLocalityGroup("lg1"); + tera::ColumnFamilyDescriptor* cf1 = table_desc.AddColumnFamily("cf", "lg1"); + cf1->EnableGlobalTransaction(); + cf1->EnableNotify(); + tera::ColumnFamilyDescriptor* cf2 = table_desc.AddColumnFamily("cf_1", "lg1"); + cf2->EnableGlobalTransaction(); + cf2->EnableNotify(); + + ExtendNotifyLgToDescriptor(&table_desc); + + client->CreateTable(table_desc, &err); + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(ERROR) << "Create table fail"; + } + + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_coord_type = "ins"; + common::ThreadPool thread_pool(5); + ScannerImpl* scanner = new ScannerImpl(); + Observer* observer = new DemoObserver(); + scanner->key_selector_.reset(new RandomKeySelector()); + + // single thread + + err = scanner->Observe("observer_table", "cf", "qualifier", observer); + EXPECT_TRUE(err.GetType() != tera::ErrorCode::kOK); + + scanner->tera_client_ = tera::Client::NewClient(FLAGS_flagfile, &err); + EXPECT_EQ(scanner->table_observe_info_->size(), 0); + + err = scanner->Observe("observer_table", "cf", "qualifier", observer); + EXPECT_TRUE(err.GetType() == tera::ErrorCode::kOK); + + err = scanner->Observe("observer_table", "cf", "qualifier", observer); + EXPECT_FALSE(err.GetType() == tera::ErrorCode::kOK); + + err = scanner->Observe("observer_table", "cf_1", "qualifier", observer); + EXPECT_TRUE(err.GetType() == tera::ErrorCode::kOK); + + // multi thread + std::string qualifier; + + for (uint32_t i = 0; i < 10; ++i) { + qualifier += 'a'; + thread_pool.AddTask(std::bind(&ScannerImpl::Observe, scanner, "observer_table", "cf", qualifier, observer)); + } + thread_pool.Stop(true); + EXPECT_EQ(1, scanner->observers_.size()); + EXPECT_EQ(10 + 2, (*(scanner->table_observe_info_))["observer_table"].observe_columns.size()); + scanner->Exit(); + delete scanner; + } +}; + +TEST_F(ObserverImplTest, OnNotifyTest) { + FLAGS_tera_gtxn_test_opened = true; + FLAGS_tera_coord_type = "ins"; + FLAGS_mock_rowlock_enable = true; + OnNotifyTest(); +} + +TEST_F(ObserverImplTest, SingleRowTransactionTest) { + FLAGS_tera_gtxn_test_opened = true; + FLAGS_tera_coord_type = "ins"; + FLAGS_mock_rowlock_enable = true; + SingleRowTransactionTest(); +} + +TEST_F(ObserverImplTest, NoneTransactionTest) { + FLAGS_tera_gtxn_test_opened = true; + FLAGS_tera_coord_type = "ins"; + FLAGS_mock_rowlock_enable = true; + NonTransactionTest(); +} + +TEST_F(ObserverImplTest, ObserveTest) { + FLAGS_tera_gtxn_test_opened = true; + FLAGS_tera_coord_type = "ins"; + FLAGS_mock_rowlock_enable = true; + ObserveTest(); +} + +} // namespace observer +} // namespace tera + +int main(int argc, char** argv) { + FLAGS_tera_sdk_client_for_gtxn = true; + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/src/observer/test/rowlock_proxy_test.cc b/src/observer/test/rowlock_proxy_test.cc new file mode 100644 index 000000000..3b690686b --- /dev/null +++ b/src/observer/test/rowlock_proxy_test.cc @@ -0,0 +1,107 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include +#include +#include +#include + +#include "observer/rowlockproxy/remote_rowlock_proxy.h" +#include "observer/rowlockproxy/rowlock_proxy_impl.h" +#include "proto/rpc_client.h" +#include "sdk/rowlock_client.h" +#include "utils/utils_cmd.h" + +class TestClosure : public google::protobuf::Closure { +public: + TestClosure() {} + virtual void Run() {} +}; + +namespace tera { +namespace observer { + +class TestClient : public RowlockStub { +public: + TestClient() : RowlockStub("127.0.0.1:22222") {}; + ~TestClient() {} + + virtual bool TryLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done = NULL) { + response->set_lock_status(kLockSucc); + return true; + } + + virtual bool UnLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done = NULL) { + response->set_lock_status(kLockSucc); + return true; + } +}; + +TEST(RowlockProxyTest, ValueTest) { + RowlockProxyImpl rowlock_proxy_impl; + + rowlock_proxy_impl.SetServerNumber(100); + EXPECT_EQ(100, rowlock_proxy_impl.server_number_); + EXPECT_EQ(100, rowlock_proxy_impl.GetServerNumber()); + + rowlock_proxy_impl.SetServerNumber(1000); + EXPECT_EQ(1000, rowlock_proxy_impl.server_number_); + EXPECT_EQ(1000, rowlock_proxy_impl.GetServerNumber()); + + rowlock_proxy_impl.SetServerNumber(2); + EXPECT_EQ(1000, rowlock_proxy_impl.server_addrs_->size()); + EXPECT_EQ(0, rowlock_proxy_impl.clients_->size()); + rowlock_proxy_impl.UpdateServers(0, "0.0.0.0:9999"); + + EXPECT_EQ(1, rowlock_proxy_impl.clients_->size()); + rowlock_proxy_impl.UpdateServers(0, "0.0.1.1:9999"); + + EXPECT_EQ(2, rowlock_proxy_impl.clients_->size()); + + EXPECT_EQ(std::hash()("tablerow"), + rowlock_proxy_impl.GetRowKey("table", "row")); + + EXPECT_EQ((*rowlock_proxy_impl.server_addrs_)[0], rowlock_proxy_impl.ScheduleRowKey(0)); + EXPECT_EQ((*rowlock_proxy_impl.server_addrs_)[1], rowlock_proxy_impl.ScheduleRowKey(1)); +} + +TEST(RowlockProxyTest, LockTest) { + RowlockProxyImpl rowlock_proxy_impl; + + rowlock_proxy_impl.SetServerNumber(1); + rowlock_proxy_impl.UpdateServers(0, "0.0.0.0:9999"); + EXPECT_EQ(1, rowlock_proxy_impl.server_addrs_->size()); + EXPECT_EQ(1, rowlock_proxy_impl.clients_->size()); + + EXPECT_TRUE(rowlock_proxy_impl.clients_->find("0.0.0.0:9999") != + rowlock_proxy_impl.clients_->end()); + delete (*rowlock_proxy_impl.clients_)["0.0.0.0:9999"]; + (*rowlock_proxy_impl.clients_)["0.0.0.0:9999"] = new TestClient(); + + RowlockRequest request; + RowlockResponse response; + request.set_table_name("table"); + request.set_row("row"); + + google::protobuf::Closure* closure = new TestClosure(); + + rowlock_proxy_impl.TryLock(&request, &response, closure); + EXPECT_EQ(response.lock_status(), kLockSucc); + + google::protobuf::Closure* unlock_closure = new TestClosure(); + rowlock_proxy_impl.UnLock(&request, &response, unlock_closure); + EXPECT_EQ(response.lock_status(), kLockSucc); +} + +} // namespace observer +} // namespace tera + diff --git a/src/observer/test/rowlock_test.cc b/src/observer/test/rowlock_test.cc new file mode 100644 index 000000000..611cf195c --- /dev/null +++ b/src/observer/test/rowlock_test.cc @@ -0,0 +1,184 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "observer/rowlocknode/rowlock_db.h" +#include "common/counter.h" + +DECLARE_int32(rowlock_timing_wheel_patch_num); + +namespace tera { +namespace observer { + +class LockTest { +public: + void Lock(tera::observer::ShardedRowlockDB* db, Counter* succeed) { + for (uint32_t i = 0; i < 10; ++i) { + uint64_t key = 1; + + if (db->TryLock(key) == true) { + succeed->Inc(); + } + } + } +}; + +TEST(ShardedRowlockDB, LockTest) { + ShardedRowlockDB db; + + // test for lock + EXPECT_EQ(0, db.Size()); + + // different keys + EXPECT_TRUE(db.TryLock(0)); + EXPECT_TRUE(db.TryLock(1)); + EXPECT_TRUE(db.TryLock(2)); + + // same key that has been locked + EXPECT_FALSE(db.TryLock(0)); + EXPECT_FALSE(db.TryLock(1)); + EXPECT_FALSE(db.TryLock(2)); + + // test for unlock + db.UnLock(0); + EXPECT_TRUE(db.TryLock(0)); + + // unlock for other locked keys + EXPECT_FALSE(db.TryLock(1)); + EXPECT_FALSE(db.TryLock(2)); + + // double unlock + db.UnLock(0); + db.UnLock(0); + EXPECT_TRUE(db.TryLock(0)); + + // unlock size + EXPECT_EQ(3, db.Size()); + db.UnLock(0); + EXPECT_EQ(2, db.Size()); + db.UnLock(0); + EXPECT_EQ(2, db.Size()); + db.UnLock(1); + EXPECT_EQ(1, db.Size()); + db.UnLock(2); + EXPECT_EQ(0, db.Size()); + + // test for ClearTimeout + for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { + // all keys will not be unlocked until timeing wheel works + EXPECT_TRUE(db.TryLock(i)); + EXPECT_EQ(i + 1, db.Size()); + db.ClearTimeout(); + } + + // timing wheel has run a circle, oldest key will be unlocked + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 1, db.Size()); + + // unlock the second oldest key + db.ClearTimeout(); + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 2, db.Size()); + + // test for ClearTimeout multi keys + for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { + // all keys will not be unlocked until timeing wheel works + EXPECT_TRUE(db.TryLock(i * 10 + 1000000)); + EXPECT_TRUE(db.TryLock(i * 10 + 1000001)); + EXPECT_TRUE(db.TryLock(i * 10 + 1000002)); + db.ClearTimeout(); + } + + // timing wheel has run a circle, oldest 3 keys will be unlocked + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num * 3 - 3, db.Size()); + + // unlock the oldest 3 keys + db.ClearTimeout(); + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num * 3 - 6, db.Size()); +} + +TEST(RowlockDB, LockTest) { + RowlockDB db; + + // test for lock + EXPECT_EQ(0, db.Size()); + + // different keys + EXPECT_TRUE(db.TryLock(0)); + EXPECT_TRUE(db.TryLock(1)); + EXPECT_TRUE(db.TryLock(2)); + + // same key that has been locked + EXPECT_FALSE(db.TryLock(0)); + EXPECT_FALSE(db.TryLock(1)); + EXPECT_FALSE(db.TryLock(2)); + + // test for unlock + db.UnLock(0); + EXPECT_TRUE(db.TryLock(0)); + + // unlock for other locked keys + EXPECT_FALSE(db.TryLock(1)); + EXPECT_FALSE(db.TryLock(2)); + + // double unlock + db.UnLock(0); + db.UnLock(0); + EXPECT_TRUE(db.TryLock(0)); + + // unlock size + EXPECT_EQ(3, db.Size()); + db.UnLock(0); + EXPECT_EQ(2, db.Size()); + db.UnLock(0); + EXPECT_EQ(2, db.Size()); + db.UnLock(1); + EXPECT_EQ(1, db.Size()); + db.UnLock(2); + EXPECT_EQ(0, db.Size()); + + // test for ClearTimeout + for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { + // all keys will not be unlocked until timeing wheel works + EXPECT_TRUE(db.TryLock(i)); + EXPECT_EQ(i + 1, db.Size()); + db.ClearTimeout(); + } + + // timing wheel has run a circle, oldest key will be unlocked + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 1, db.Size()); + + // unlock the second oldest key + db.ClearTimeout(); + EXPECT_EQ(FLAGS_rowlock_timing_wheel_patch_num - 2, db.Size()); +} + +TEST(ShardedRowlockDB, ParaTest) { + Counter counter; + ShardedRowlockDB db; + LockTest test; + + // 10 threads to lock the same key + ThreadPool thread_pool(10); + for (uint32_t i = 0; i < 10; ++i) { + ThreadPool::Task task = std::bind(&LockTest::Lock, &test, &db, &counter); + thread_pool.AddTask(task); + } + sleep(1); + EXPECT_EQ(1, db.Size()); + EXPECT_EQ(1, counter.Get()); + + for (int32_t i = 0; i < FLAGS_rowlock_timing_wheel_patch_num; ++i) { + db.ClearTimeout(); + } + EXPECT_EQ(0, db.Size()); +} + +} // namespace observer +} // namespace tera diff --git a/src/observer/test/scanner_test.cc b/src/observer/test/scanner_test.cc new file mode 100644 index 000000000..fc1b91c05 --- /dev/null +++ b/src/observer/test/scanner_test.cc @@ -0,0 +1,495 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "observer/executor/random_key_selector.h" +#include "observer/executor/scanner_impl.h" +#include "observer/observer_demo/demo_observer.h" +#include "sdk/client_impl.h" +#include "sdk/global_txn.h" +#include "sdk/mutate_impl.h" +#include "sdk/read_impl.h" +#include "sdk/table_impl.h" +#include "sdk/sdk_utils.h" +#include "tera.h" + +DECLARE_bool(tera_sdk_client_for_gtxn); +DECLARE_bool(tera_sdk_tso_client_enabled); +DECLARE_string(tera_coord_type); +DECLARE_bool(rowlock_test); + +namespace tera { +namespace observer { + + +class TestRowReader : public RowReaderImpl { +public: + TestRowReader(TableImpl* table, const std::string& row_key) + : RowReaderImpl(table, row_key), seq_(0) { + if (row_key == "empty") { + // empty case + } else if (row_key == "900") { + value_.push_back("900"); + value_.push_back("900"); + value_.push_back("901"); + value_.push_back("920"); + } else if (row_key == "1100") { + value_.push_back("1000"); + value_.push_back("1000"); + value_.push_back("1100"); + value_.push_back("1100"); + } else if (row_key == "1hour") { + value_.push_back("810"); + value_.push_back("820"); + value_.push_back("830"); + value_.push_back("840"); + } else if (row_key == "collision_mix") { + value_.push_back("100"); + value_.push_back("1000"); + value_.push_back("4700"); + value_.push_back("1100"); + } else if (row_key == "error_ts") { + value_.push_back("100:sffaeeew"); + } else if (row_key == "some_error_ts") { + value_.push_back("wrong_string"); + value_.push_back("900"); + value_.push_back("900"); + value_.push_back("900"); + } else { + value_.push_back("1010"); + value_.push_back("1012"); + value_.push_back("1013"); + value_.push_back("1014"); + value_.push_back("1015"); + value_.push_back("1016"); + value_.push_back("1017"); + } + } + virtual std::string Value() { + return value_[seq_]; + + } + virtual int64_t Timestamp() { + return 9999999; + } + virtual void AddColumn(const std::string& family, const std::string& qualifier) {} + virtual bool Done() { + return seq_ == value_.size(); + } + virtual void Next() { + seq_++; + } +private: + std::vector value_; + uint32_t seq_; +}; + +class TestTransaction : public GlobalTxn { +public: + TestTransaction(int64_t start_ts, common::ThreadPool* thread_pool, bool error = false) + : GlobalTxn(NULL, thread_pool, NULL), + start_timestamp_(1000), error_(error) {} + + virtual ~TestTransaction() {} + virtual ErrorCode Get(RowReader* row_reader) { + ErrorCode err; + return err; + } + virtual int64_t GetStartTimestamp() { + return start_timestamp_; + } + virtual const ErrorCode& GetError() { + if (error_ == true) { + err_.SetFailed(ErrorCode::kSystem, ""); + } + return err_; + } +private: + int64_t start_timestamp_; + ErrorCode err_; + bool error_; +}; + +class TestRowMutationImpl : public RowMutationImpl { +public: + TestRowMutationImpl(Table* table, const std::string& row_key) + : RowMutationImpl(table, row_key) {} + virtual void Put(const std::string& value, int32_t ttl = -1) {} + virtual void ApplyMutation(RowMutation* row_mu) {} +}; + +class TestTable : public TableImpl { +public: + TestTable(const std::string& table_name, + ThreadPool* thread_pool, + sdk::ClusterFinder* cluster) + : TableImpl(table_name, thread_pool, cluster), + global_txn_(true), + thread_pool_(thread_pool) {} + virtual RowReader* NewRowReader(const std::string& row_key) { + return new TestRowReader(this, row_key); + } + virtual Transaction* StartRowTransaction(const std::string& row_key) { + return new TestTransaction(1, thread_pool_); + } + virtual RowMutation* NewRowMutation(const std::string& row_key) { + return new TestRowMutationImpl(this, row_key); + } + virtual void CommitRowTransaction(Transaction* transaction) {} + virtual bool GetDescriptor(TableDescriptor* schema, ErrorCode* err) { + schema->AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema->AddColumnFamily("cf1"); + cfd1->EnableNotify(); + ExtendNotifyLgToDescriptor(schema); + if (!global_txn_) { + cfd1->DisableGlobalTransaction(); + } + return true; + } +private: + bool global_txn_; + common::ThreadPool* thread_pool_; +}; + +class TestResultStream : public tera::ResultStream{ +public: + virtual bool Done(ErrorCode* err) { + if (next_number_ < row_name_.size()) { + return false; + } else { + return true; + } + } + virtual void Next() { + next_number_++; + } + + virtual std::string RowName() const { + return row_name_[next_number_]; + } + virtual std::string Qualifier() const { + return qualifier_[next_number_]; + } + + + virtual std::string Family() const { + return ""; + } + + virtual int64_t Timestamp() const { + return 0; + } + virtual std::string Value() const { + return ""; + } + + virtual int64_t ValueInt64() const { + return 0; + } + + virtual bool LookUp(const std::string& row_key) { + return true; + } + + virtual std::string ColumnName() const { + return ""; + } +private: + uint32_t next_number_; + std::vector row_name_; + std::vector qualifier_; + bool done_; +}; + +class TestObserver : public tera::observer::Observer { +public: + TestObserver() : count_(0) {} + virtual ~TestObserver() {} + virtual ErrorCode OnNotify(tera::Transaction* t, + tera::Client* client, + const std::string& table_name, + const std::string& family, + const std::string& qualifier, + const std::string& row, + const std::string& value, + int64_t timestamp, + Notification* notification) { + LOG(INFO) << "[Notify TestObserver] table:family:qualifer=" << + table_name << ":" << family << ":" << + qualifier << " row=" << row << + " value=" << value << " timestamp=" << timestamp; + + count_++; + + tera::ErrorCode err; + // do nothing + return err; + } + virtual std::string GetObserverName() const { + return "TestObserver"; + } + + virtual TransactionType GetTransactionType() const { + return kGlobalTransaction; + } +private: + std::atomic count_; +}; + +class TestClient : public ClientImpl { +public: + TestClient() : ClientImpl("", "") {} + ~TestClient() {} + virtual Table* OpenTable(const std::string& table_name, ErrorCode* err) { + return static_cast(new TestTable(table_name, &thread_pool_, NULL)); + } +}; + +class TestKeySelector : public RandomKeySelector { +public: + TestKeySelector() {} + virtual ErrorCode Observe(const std::string& table_name) { + tera::ErrorCode err; + return err; + } +}; + +TEST(ScannerImpl, ParseNotifyQualifier) { + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_coord_type = "mock_zk"; + ScannerImpl scanner; + + std::string data_family; + std::string data_qualfier; + + EXPECT_TRUE(scanner.ParseNotifyQualifier("C:url", &data_family, &data_qualfier)); + EXPECT_EQ(data_family, "C"); + EXPECT_EQ(data_qualfier, "url"); + + EXPECT_TRUE(scanner.ParseNotifyQualifier("cf:page", &data_family, &data_qualfier)); + EXPECT_EQ(data_family, "cf"); + EXPECT_EQ(data_qualfier, "page"); + + EXPECT_TRUE(scanner.ParseNotifyQualifier("cf::::::", &data_family, &data_qualfier)); + EXPECT_EQ(data_family, "cf"); + EXPECT_EQ(data_qualfier, ":::::"); + + EXPECT_TRUE(scanner.ParseNotifyQualifier("cf:___", &data_family, &data_qualfier)); + EXPECT_EQ(data_family, "cf"); + EXPECT_EQ(data_qualfier, "___"); + + EXPECT_FALSE(scanner.ParseNotifyQualifier("Curl", &data_family, &data_qualfier)); + EXPECT_FALSE(scanner.ParseNotifyQualifier("C_url", &data_family, &data_qualfier)); + EXPECT_FALSE(scanner.ParseNotifyQualifier("C.urlN_", &data_family, &data_qualfier)); + EXPECT_FALSE(scanner.ParseNotifyQualifier("++page", &data_family, &data_qualfier)); + +} + +TEST(ScannerImpl, DoReadValue) { + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_mock_rowlock_enable = true; + FLAGS_tera_coord_type = "mock_zk"; + common::ThreadPool thread_pool(2); + ScannerImpl scanner; + TestTable table("test_table", &thread_pool, NULL); + + std::shared_ptr notify_cell(new NotifyCell(new TestTransaction(1, &thread_pool))); + Column column = {"test_table", "family", "qualifier"}; + + notify_cell->row = "row"; + notify_cell->value = "value"; + notify_cell->timestamp = 999999999; + notify_cell->observed_column = column; + notify_cell->table = &table; + + // no table name + EXPECT_FALSE(scanner.DoReadValue(notify_cell)); + // no column + ScannerImpl::TableObserveInfo cell; + (*scanner.table_observe_info_)["test_table"] = cell; + EXPECT_FALSE(scanner.DoReadValue(notify_cell)); + // size 0 + (*scanner.table_observe_info_)["test_table"].observe_columns[column].clear(); + EXPECT_FALSE(scanner.DoReadValue(notify_cell)); + + Observer* observer = new TestObserver(); + // normal + (*scanner.table_observe_info_)["test_table"].observe_columns[column].insert(observer); + EXPECT_TRUE(scanner.DoReadValue(notify_cell)); + + // multi observer + Observer* parse = new TestObserver(); + (*scanner.table_observe_info_)["test_table"].observe_columns[column].insert(parse); + EXPECT_TRUE(scanner.DoReadValue(notify_cell)); +} + +TEST(ScannerImpl, MultiThreadDoReadValue) { + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_mock_rowlock_enable = true; + FLAGS_tera_coord_type = "mock_zk"; + common::ThreadPool thread_pool(2); + ScannerImpl scanner; + TestTable table("test_table", &thread_pool, NULL); + + std::shared_ptr notify_cell(new NotifyCell(new TestTransaction(1, &thread_pool))); + Column column = {"test_table", "family", "qualifier"}; + + notify_cell->row = "row"; + notify_cell->value = "value"; + notify_cell->timestamp = 100; + notify_cell->observed_column = column; + notify_cell->table = &table; + + Observer* observer = new TestObserver(); + (*scanner.table_observe_info_)["test_table"].observe_columns[column].insert(observer); + + common::ThreadPool worker_thread(10); + for (uint32_t i = 0; i < 10; ++i) { + worker_thread.AddTask(std::bind(&ScannerImpl::DoReadValue, &scanner, notify_cell)); + } + worker_thread.Stop(true); + scanner.transaction_threads_->Stop(true); + EXPECT_EQ(((TestObserver*)observer)->count_, 10); +} + +TEST(ScannerImpl, NextRow) { + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_coord_type = "mock_zk"; + std::unique_ptr result_stream(new TestResultStream()); + ScannerImpl scanner; + std::set columns; + bool finished = false; + std::string vec_rowkey; + std::vector vec_col; + + // stream done + EXPECT_FALSE(scanner.NextRow(columns, result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); + EXPECT_EQ(true, finished); + + finished = false; + static_cast(result_stream.get())->row_name_.push_back("row1"); + static_cast(result_stream.get())->qualifier_.push_back("cf:page1"); + static_cast(result_stream.get())->row_name_.push_back("row1"); + static_cast(result_stream.get())->qualifier_.push_back("cf:page2"); + static_cast(result_stream.get())->row_name_.push_back("row2"); + static_cast(result_stream.get())->qualifier_.push_back("cf:page3"); + static_cast(result_stream.get())->row_name_.push_back("row2"); + static_cast(result_stream.get())->qualifier_.push_back("cf:page4"); + + Column colum_1 = {"table_name", "cf", "page1"}; + Column colum_2 = {"table_name", "cf", "page2"}; + Column colum_3 = {"table_name", "cf", "page3"}; + Column colum_4 = {"table_name", "cf", "page4"}; + columns.insert(colum_1); + columns.insert(colum_2); + columns.insert(colum_3); + columns.insert(colum_4); + + // row 1 + EXPECT_TRUE(scanner.NextRow(columns, result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); + EXPECT_FALSE(finished); + + // row 1 data + EXPECT_EQ(vec_col.size(), 2); + EXPECT_EQ(vec_rowkey, "row1"); + EXPECT_EQ(vec_col[0].qualifier, "page1"); + EXPECT_EQ(vec_col[1].qualifier, "page2"); + + // row 2 + EXPECT_TRUE(scanner.NextRow(columns, result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); + EXPECT_FALSE(finished); + + // row 2 data + EXPECT_EQ(vec_col.size(), 2); + EXPECT_EQ(vec_rowkey, "row2"); + EXPECT_EQ(vec_col[0].qualifier, "page3"); + EXPECT_EQ(vec_col[1].qualifier, "page4"); + + // scan finish + EXPECT_FALSE(scanner.NextRow(columns, result_stream.get(), "table_name", &finished, &vec_rowkey, &vec_col)); + EXPECT_TRUE(finished); +} + + + +TEST(ScannerImpl, CheckConflictOnAckColumn) { + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_coord_type = "mock_zk"; + common::ThreadPool thread_pool(2); + ScannerImpl scanner; + TestTable table("test_table", &thread_pool, NULL); + + std::shared_ptr notify_cell(new NotifyCell(new TestTransaction(1, &thread_pool))); + Column column = {"test_table", "family", "qualifier"}; + + notify_cell->row = "row"; + notify_cell->value = "value"; + notify_cell->timestamp = 1000; + notify_cell->observed_column = column; + notify_cell->table = &table; + + std::set observers; + + TestObserver observer; + observers.insert(&observer); + + // empty case + notify_cell->row = "empty"; + EXPECT_TRUE(scanner.CheckConflictOnAckColumn(notify_cell, observers)); + + // row reader ts < transaction(notify) ts + notify_cell->row = "900"; + EXPECT_TRUE(scanner.CheckConflictOnAckColumn(notify_cell, observers)); + + // row reader ts > transaction(notify) ts + notify_cell->row = "1100"; + EXPECT_FALSE(scanner.CheckConflictOnAckColumn(notify_cell, observers)); + + // transaction ts - row reader ts < 600 + notify_cell->timestamp = 700; + notify_cell->row = "1hour"; + EXPECT_FALSE(scanner.CheckConflictOnAckColumn(notify_cell, observers)); + + // collision_mix: some legal, some illegal + notify_cell->row = "collision_mix"; + EXPECT_FALSE(scanner.CheckConflictOnAckColumn(notify_cell, observers)); + + // ack parse fail + notify_cell->timestamp = 1000; + notify_cell->row = "error_ts"; + EXPECT_FALSE(scanner.CheckConflictOnAckColumn(notify_cell, observers)); + + // some ack parse fail + notify_cell->row = "some_error_ts"; + EXPECT_FALSE(scanner.CheckConflictOnAckColumn(notify_cell, observers)); + + // mutation fail + std::shared_ptr notify_cell_fail(new NotifyCell(new TestTransaction(1, &thread_pool, true))); + + notify_cell_fail->row = "row"; + notify_cell_fail->value = "value"; + notify_cell_fail->timestamp = 1000; + notify_cell_fail->observed_column = column; + notify_cell_fail->table = &table; + + // empty case + notify_cell->row = "empty"; + EXPECT_FALSE(scanner.CheckConflictOnAckColumn(notify_cell_fail, observers)); + + // row reader ts < transaction(notify) ts + notify_cell->row = "900"; + EXPECT_FALSE(scanner.CheckConflictOnAckColumn(notify_cell_fail, observers)); +} + +} // namespace observer +} // namespace tera + diff --git a/src/proto/lb_client.cc b/src/proto/lb_client.cc new file mode 100644 index 000000000..0b70af707 --- /dev/null +++ b/src/proto/lb_client.cc @@ -0,0 +1,37 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include "gflags/gflags.h" + +#include "proto/lb_client.h" + +DECLARE_int32(tera_master_connect_retry_times); +DECLARE_int32(tera_master_connect_retry_period); +DECLARE_int32(tera_master_connect_timeout_period); + +namespace tera { +namespace load_balancer { + +LBClient::LBClient(const std::string& server_addr, + int32_t rpc_timeout) + : RpcClient(server_addr), + rpc_timeout_(rpc_timeout) { +} + +LBClient::~LBClient() { +} + +bool LBClient::CmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response) { + return SendMessageWithRetry(&LoadBalancerService::Stub::CmdCtrl, + request, response, + (std::function)NULL, + "CmdCtrl", rpc_timeout_); +} + +} // namespace load_balancer +} // namespace tera + diff --git a/src/proto/lb_client.h b/src/proto/lb_client.h new file mode 100644 index 000000000..faf47b59a --- /dev/null +++ b/src/proto/lb_client.h @@ -0,0 +1,35 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_LOAD_BALANCER_LB_CLIENT_H_ +#define TERA_LOAD_BALANCER_LB_CLIENT_H_ + +#include + +#include "proto/load_balancer_rpc.pb.h" +#include "proto/rpc_client.h" + +DECLARE_int32(tera_rpc_timeout_period); + +namespace tera { +namespace load_balancer { + +class LBClient : public RpcClient { +public: + LBClient(const std::string& server_addr = "", + int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); + virtual ~LBClient(); + + virtual bool CmdCtrl(const CmdCtrlRequest* request, + CmdCtrlResponse* response); + +private: + int32_t rpc_timeout_; +}; + +} // namespace load_balancer +} // namespace tera + +#endif // TERA_LOAD_BALANCER_LB_CLIENT_H_ + diff --git a/src/proto/load_balancer_rpc.proto b/src/proto/load_balancer_rpc.proto new file mode 100644 index 000000000..d7b077fa1 --- /dev/null +++ b/src/proto/load_balancer_rpc.proto @@ -0,0 +1,11 @@ +import "sofa/pbrpc/rpc_option.proto"; +import "master_rpc.proto"; + +package tera; + +service LoadBalancerService { + rpc CmdCtrl(CmdCtrlRequest) returns(CmdCtrlResponse); +} + +option cc_generic_services = true; + diff --git a/src/proto/rowlocknode_rpc.proto b/src/proto/rowlocknode_rpc.proto new file mode 100644 index 000000000..6e8107710 --- /dev/null +++ b/src/proto/rowlocknode_rpc.proto @@ -0,0 +1,19 @@ +import "sofa/pbrpc/rpc_option.proto"; +import "status_code.proto"; + +package tera; + +message RowlockRequest { + required string table_name = 1; + required string row = 2; +} + +message RowlockResponse { + required StatusCode lock_status = 1; +} + +service RowlockService { + rpc Lock(RowlockRequest) returns(RowlockResponse); + rpc UnLock(RowlockRequest) returns(RowlockResponse); +} +option cc_generic_services = true; diff --git a/src/proto/rpc_client.h b/src/proto/rpc_client.h index 74ded0212..9067cc96d 100644 --- a/src/proto/rpc_client.h +++ b/src/proto/rpc_client.h @@ -144,7 +144,7 @@ class RpcClient : public RpcClientBase { int32_t rpc_timeout, ThreadPool* thread_pool = 0) { if (NULL == server_client_.get()) { // sync call - if (closure == NULL) { + if (!closure) { return false; } @@ -168,7 +168,7 @@ class RpcClient : public RpcClientBase { (server_client_.get()->*func)(rpc_controller, request, response, done); // sync call - if (closure == NULL) { + if (!closure) { sync_call_event.Wait(); return (!sync_call_failed); } @@ -196,7 +196,7 @@ class RpcClient : public RpcClientBase { delete param; // sync call - if (closure == NULL) { + if (!closure) { client->sync_call_failed = failed; client->sync_call_event.Set(); return; diff --git a/src/proto/status_code.proto b/src/proto/status_code.proto index 24b0ff595..cb99c7235 100644 --- a/src/proto/status_code.proto +++ b/src/proto/status_code.proto @@ -96,6 +96,18 @@ enum StatusCode { kTableStatusEnable = 1000; kTableStatusDisable = 1001; + + // Timeoracle + kTimeoracleOk = 2000; + kTimeoracleBusy = 2001; + + // rowlock service + kLockSucc = 2100; + kLockFail = 2101; + + // LoadBalancer + kLoadBalancerOk = 2200; + kLoadBalancerError = 2201; } enum TabletStatus { @@ -118,6 +130,9 @@ enum TabletStatus { kTabletPending = 65; kTabletOnSnapshot = 66; kTabletDelSnapshot = 67; + + // runtime status + kTabletCorruption = 90; } enum TableStatus { diff --git a/src/proto/table_meta.proto b/src/proto/table_meta.proto index c0df47e63..cdf18b689 100644 --- a/src/proto/table_meta.proto +++ b/src/proto/table_meta.proto @@ -64,6 +64,7 @@ message TabletCounter { optional double write_workload = 11 [default = 0.0]; optional bool is_on_busy = 15 [default = false]; + optional TabletStatus db_status = 16; } message TableCounter { @@ -107,6 +108,7 @@ message TabletMeta { repeated uint64 parent_tablets = 12; repeated int64 lg_size = 13; repeated Rollback rollbacks = 14; + optional int64 last_move_time_us = 15; } message TableMetaList { @@ -130,3 +132,12 @@ message SdkCookie { required string table_name = 1; repeated SdkTabletCookie tablets = 2; } + +message PrimaryInfo { + optional string table_name = 1; + optional bytes row_key = 2; + optional bytes column_family = 3; + optional bytes qualifier = 4; + optional int64 gtxn_start_ts = 5; + optional string client_session = 6; +} diff --git a/src/proto/table_schema.proto b/src/proto/table_schema.proto index 9f6c8727d..62c716c53 100644 --- a/src/proto/table_schema.proto +++ b/src/proto/table_schema.proto @@ -39,6 +39,8 @@ message ColumnFamilySchema { optional int32 time_to_live = 8 [default = 0]; // 单位:秒(0:不过期, <0:提前过期, >0:延后过期) optional int64 disk_quota = 9; optional string type = 10; + optional bool gtxn = 11 [default = false]; // 'gtxn=on' for global transaction feature availability + optional bool notify = 12 [default = false]; // 'notify=on' for notify feature availability } message TableSchema { diff --git a/src/proto/tabletnode.proto b/src/proto/tabletnode.proto index fff28caa5..d36f5e0f2 100644 --- a/src/proto/tabletnode.proto +++ b/src/proto/tabletnode.proto @@ -14,6 +14,7 @@ message TabletNodeInfo { optional uint64 timestamp = 4; optional uint32 tablet_total = 5; optional uint32 tablet_onbusy = 6; + optional uint32 tablet_corruption = 7; optional uint32 low_read_cell = 11; optional uint32 scan_rows = 12; diff --git a/src/proto/tabletnode_client.cc b/src/proto/tabletnode_client.cc index b6b347d2d..e57a5e8a8 100644 --- a/src/proto/tabletnode_client.cc +++ b/src/proto/tabletnode_client.cc @@ -105,6 +105,14 @@ bool TabletNodeClient::SplitTablet(const SplitTabletRequest* request, request, response, done, "SplitTablet", rpc_timeout_, thread_pool_); } +bool TabletNodeClient::ComputeSplitKey(const SplitTabletRequest* request, + SplitTabletResponse* response, + std::function done) { + return SendMessageWithRetry(&TabletNodeServer::Stub::ComputeSplitKey, + request, response, done, "ComputeSplitKey", + rpc_timeout_, thread_pool_); +} + bool TabletNodeClient::CompactTablet(const CompactTabletRequest* request, CompactTabletResponse* response, diff --git a/src/proto/tabletnode_client.h b/src/proto/tabletnode_client.h index c56e0d7c0..1033841d0 100644 --- a/src/proto/tabletnode_client.h +++ b/src/proto/tabletnode_client.h @@ -69,6 +69,9 @@ class TabletNodeClient : public RpcClient { bool SplitTablet(const SplitTabletRequest* request, SplitTabletResponse* response, std::function done = NULL); + bool ComputeSplitKey(const SplitTabletRequest* request, SplitTabletResponse* response, + std::function done = NULL); + bool CompactTablet(const CompactTabletRequest* request, CompactTabletResponse* response, diff --git a/src/proto/tabletnode_rpc.proto b/src/proto/tabletnode_rpc.proto index 0d79ce0c7..45651203e 100644 --- a/src/proto/tabletnode_rpc.proto +++ b/src/proto/tabletnode_rpc.proto @@ -90,6 +90,7 @@ message LoadTabletRequest { repeated uint64 snapshots_sequence = 10; repeated uint64 parent_tablets = 11; repeated Rollback rollbacks = 12; + repeated string ignore_err_lgs = 13; } message LoadTabletResponse { @@ -263,6 +264,7 @@ message ScanTabletRequest { optional int64 timestamp = 18 [default = 0]; optional int64 timeout = 19; optional int64 number_limit = 21; + optional uint64 max_qualifiers = 22; } message ScanTabletResponse { @@ -282,6 +284,7 @@ message RowReaderInfo { optional TimeRange time_range = 3; optional FilterList filter_list = 4; optional uint32 max_version = 5; + optional uint64 max_qualifiers = 6; } message ReadTabletRequest { @@ -309,11 +312,13 @@ message SplitTabletRequest { optional TabletMeta tablet_meta = 4; repeated uint64 child_tablets = 5; optional bytes split_key = 6; + optional bool master_update_meta = 7; } message SplitTabletResponse { required StatusCode status = 1 [default = kTableMergeError]; required uint64 sequence_id = 2; + repeated string split_keys = 3; } message MergeTabletRequest { @@ -367,6 +372,7 @@ service TabletNodeServer { rpc Rollback(SnapshotRollbackRequest) returns(SnapshotRollbackResponse); rpc SplitTablet(SplitTabletRequest) returns(SplitTabletResponse); + rpc ComputeSplitKey(SplitTabletRequest) returns (SplitTabletResponse); rpc CmdCtrl(TsCmdCtrlRequest) returns(TsCmdCtrlResponse); rpc Update(UpdateRequest) returns(UpdateResponse); diff --git a/src/proto/timeoracle_rpc.proto b/src/proto/timeoracle_rpc.proto new file mode 100644 index 000000000..f96661b9f --- /dev/null +++ b/src/proto/timeoracle_rpc.proto @@ -0,0 +1,20 @@ +import "sofa/pbrpc/rpc_option.proto"; +import "status_code.proto"; + +package tera; + +message GetTimestampRequest { + optional uint64 count = 1; +} + +message GetTimestampResponse { + optional StatusCode status = 1; + optional int64 start_timestamp = 2; + optional uint64 count = 3; +} + +service TimeoracleServer { + rpc GetTimestamp(GetTimestampRequest) returns(GetTimestampResponse); +} + +option cc_generic_services = true; diff --git a/src/sample/Makefile b/src/sample/Makefile index 81698c729..02268f2ff 100644 --- a/src/sample/Makefile +++ b/src/sample/Makefile @@ -10,15 +10,15 @@ SHARED_LDFLAGS = -shared -Wl,-soname -Wl, INCPATH += -I../../include $(DEPS_INCPATH) CFLAGS += $(OPT) $(SHARED_CFLAGS) $(INCPATH) -CXXFLAGS += $(OPT) $(SHARED_CFLAGS) $(INCPATH) +CXXFLAGS += -std=gnu++11 $(OPT) $(SHARED_CFLAGS) $(INCPATH) LDFLAGS += ../../build/lib/libtera.a $(DEPS_LDPATH) $(DEPS_LDFLAGS) -lpthread -lz -SAMPLE_SRC := ./tera_sample.cc tera_row_txn_sample.cc atomic_sample.cc +SAMPLE_SRC := ./tera_sample.cc tera_row_txn_sample.cc atomic_sample.cc global_txn_async_sample.cc SAMPLE_OBJ := $(SAMPLE_SRC:.cc=.o) .PHONY: clean -all: sample_demo tera_row_txn_sample atomic_sample +all: sample_demo tera_row_txn_sample atomic_sample global_txn_async_sample global_txn_sync_sample sample_demo: ./tera_sample.o $(CXX) -o $@ $^ $(LDFLAGS) @@ -26,6 +26,12 @@ sample_demo: ./tera_sample.o tera_row_txn_sample: tera_row_txn_sample.o $(CXX) -o $@ $^ $(LDFLAGS) +global_txn_async_sample: global_txn_async_sample.o + $(CXX) -o $@ $^ $(LDFLAGS) + +global_txn_sync_sample: global_txn_sync_sample.o + $(CXX) -o $@ $^ $(LDFLAGS) + atomic_sample: atomic_sample.o $(CXX) -o $@ $^ $(LDFLAGS) @@ -36,5 +42,7 @@ clean: rm -f *.o rm -f ./sample_demo rm -f ./tera_row_txn_sample + rm -f ./global_txn_async_sample + rm -f ./global_txn_sync_sample rm -f ./atomic_sample diff --git a/src/sample/atomic_sample.cc b/src/sample/atomic_sample.cc index ce35fbe6b..3053ec8b4 100644 --- a/src/sample/atomic_sample.cc +++ b/src/sample/atomic_sample.cc @@ -1,4 +1,5 @@ #include +#include #include "tera.h" int main() { diff --git a/src/sample/global_txn_async_sample.cc b/src/sample/global_txn_async_sample.cc new file mode 100644 index 000000000..a2f77896e --- /dev/null +++ b/src/sample/global_txn_async_sample.cc @@ -0,0 +1,143 @@ +#include +#include +#include +#include + +#include +#include + +#include "tera.h" + +std::string read_result = ""; +std::atomic all_gtxn_thread_done(false); +std::atomic finish_cnt(0); + +struct RowReaderContext { + tera::Transaction* gtxn; + tera::Table* t1; + tera::Table* t2; +}; + +tera::Table* InitTable(tera::Client* client, const std::string& tablename) { + tera::ErrorCode error_code; + if (!client->IsTableExist(tablename, &error_code)) { + tera::TableDescriptor schema(tablename); + schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->DisableGlobalTransaction(); + client->CreateTable(schema, &error_code); + assert(error_code.GetType() == tera::ErrorCode::kOK); + } + + tera::Table* table = client->OpenTable(tablename, &error_code); + assert(table && error_code.GetType() == tera::ErrorCode::kOK); + return table; +} + +void TxnCallBack(tera::Transaction* txn) { + if (txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << "txn failed, start_ts= " << txn->GetStartTimestamp() + << ", reason= " << txn->GetError().ToString() + << std::endl; + } else { + std::cout << "gtxn success" << std::endl; + } + delete txn; + all_gtxn_thread_done.store(true); +} + +void ReadRowCallBack(tera::RowReader* row_reader) { + RowReaderContext* ctx = (RowReaderContext*)row_reader->GetContext(); + while (!row_reader->Done()) { + printf("Row: %s\%s\%ld\%s\n", + row_reader->RowName().c_str(), row_reader->ColumnName().c_str(), + row_reader->Timestamp(), row_reader->Value().c_str()); + row_reader->Next(); + read_result += row_reader->Value(); + } + delete row_reader; + ++finish_cnt; + // mutations begin at all reader callback done + if (finish_cnt.load() == 2) { + // write to other columns + tera::Transaction* g_txn = ctx->gtxn; + tera::RowMutation* m1 = ctx->t1->NewRowMutation("r1"); + tera::RowMutation* m2 = ctx->t2->NewRowMutation("r1"); + m1->Put( "cf1", "q1", read_result); + m2->Put( "cf1", "q1", read_result); + + // ApplyMutation only modifying local memory and do not need asynchronous + // we also support asynchronous interface for RowMutation,as you like + g_txn->ApplyMutation(m1); + g_txn->ApplyMutation(m2); + g_txn->SetCommitCallback(TxnCallBack); + delete m1; + delete m2; + // need not check ApplyMutation, Transaction will be check before commit. + g_txn->Commit(); + } +} + +void DoTxn(tera::Client* client, tera::Table* t1, tera::Table* t2) { + + // begin global transaction + tera::Transaction* g_txn = client->NewGlobalTransaction(); + if (g_txn == NULL) { + return; + } + + // read from different tables + tera::RowReader* r1 = t1->NewRowReader("r1"); + tera::RowReader* r2 = t2->NewRowReader("r1"); + r1->AddColumn("cf1", "q2"); + r2->AddColumn("cf1", "q2"); + r1->SetCallBack(ReadRowCallBack); + r2->SetCallBack(ReadRowCallBack); + RowReaderContext ctx; + ctx.gtxn = g_txn; + ctx.t1 = t1; + ctx.t2 = t2; + r1->SetContext(&ctx); + r2->SetContext(&ctx); + // read from t1:r1:cf1:q2 and check + g_txn->Get(r1); + // read from t2:r1:cf1:q2 and check + g_txn->Get(r2); +} + +int main(int argc, char *argv[]) { + + tera::ErrorCode error_code; + + tera::Client* client = tera::Client::NewClient("../conf/tera.flag", "global_txn_sample_async", &error_code); + if (client == NULL) { + return -1; + } + + // create or open tables + // before global transaction should be + // (1) OpenTable which you will r/w + // (2) check OpenTable success + tera::Table* t1 = InitTable(client, "t1"); + tera::Table* t2 = InitTable(client, "t2"); + + // the global transaction may add to threadpool, which implements by yourself. + // + // In this example, + // + // first, read two cell values from different tables, + // next, get all values concat at reader callback, + // last, put concat result into different tables. + DoTxn(client, t1, t2); + + // global transaction thead always finished before callback + // wait for callback thread done at main thread + // if your know the program can't exit before callback done, it's not necessary. + while (!all_gtxn_thread_done.load()) { + usleep(100); + } + return 0; +} diff --git a/src/sample/global_txn_sync_sample.cc b/src/sample/global_txn_sync_sample.cc new file mode 100644 index 000000000..66bb94b7d --- /dev/null +++ b/src/sample/global_txn_sync_sample.cc @@ -0,0 +1,107 @@ +#include +#include + +#include +#include "tera.h" + +int main(int argc, char *argv[]) { + + tera::ErrorCode error_code; + + tera::Client* client = tera::Client::NewClient("../conf/tera.flag", "global_txn_sample", &error_code); + assert(client); + // create or open tables + tera::Table* t1 = nullptr; + tera::Table* t2 = nullptr; + if (!client->IsTableExist("t1", &error_code)) { + tera::TableDescriptor schema("t1"); + schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->EnableGlobalTransaction(); + client->CreateTable(schema, &error_code); + assert(error_code.GetType() == tera::ErrorCode::kOK); + } + + if (!client->IsTableExist("t2", &error_code)) { + tera::TableDescriptor schema("t2"); + schema.EnableTxn(); // 参与全局事务的表schema 都需要设置 txn=true + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->EnableGlobalTransaction(); + client->CreateTable(schema, &error_code); + assert(error_code.GetType() == tera::ErrorCode::kOK); + } + // before global transaction should be + // (1) OpenTable which you will r/w + // (2) check OpenTable success + t1 = client->OpenTable("t1", &error_code); + assert(t1 && error_code.GetType() == tera::ErrorCode::kOK); + + t2 = client->OpenTable("t2", &error_code); + assert(t2 && error_code.GetType() == tera::ErrorCode::kOK); + + // begin global transaction + tera::Transaction* g_txn = client->NewGlobalTransaction(); + if (g_txn == NULL) { + return -1; + } + if (error_code.GetType()!=tera::ErrorCode::kOK) { + std::cout << error_code.ToString() << std::endl; + return -1; + } + // read from different tables + std::unique_ptr r1(t1->NewRowReader("r1")); + std::unique_ptr r2(t2->NewRowReader("r1")); + r1->AddColumn("cf1", "q2"); + r2->AddColumn("cf1", "q2"); + // read from t1:r1:cf1:q2 and check + g_txn->Get(r1.get()); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + return -1; + } + std::string r1_v = ""; + while(!r1->Done()) { + std::cout << r1->Value() << std::endl; + r1_v = r1->Value(); + r1->Next(); + } + + // read from t2:r1:cf1:q2 and check + g_txn->Get(r2.get()); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + return -1; + } + std::string r2_v = ""; + while(!r2->Done()) { + std::cout << r2->Value() << std::endl; + r2_v = r2->Value(); + r2->Next(); + } + + // write to other columns + std::unique_ptr m1(t1->NewRowMutation("r1")); + std::unique_ptr m2(t2->NewRowMutation("r1")); + m1->Put( "cf1", "q1", r2_v); + m2->Put( "cf1", "q1", r1_v); + + g_txn->ApplyMutation(m1.get()); + g_txn->ApplyMutation(m2.get()); + // need not check ApplyMutation, Transaction will be check before commit. + g_txn->Commit(); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + } else { + std::cout << "commit success" << std::endl; + } + + delete g_txn; + // end global transaction + return 0; +} diff --git a/src/sample/tera_row_txn_sample.cc b/src/sample/tera_row_txn_sample.cc index 4c9897708..879652dfc 100644 --- a/src/sample/tera_row_txn_sample.cc +++ b/src/sample/tera_row_txn_sample.cc @@ -1,3 +1,6 @@ +#include +#include + #include "tera.h" int main() { diff --git a/src/sdk/client_impl.cc b/src/sdk/client_impl.cc index bc9eb1998..3599b2e9e 100644 --- a/src/sdk/client_impl.cc +++ b/src/sdk/client_impl.cc @@ -10,6 +10,7 @@ #include "gflags/gflags.h" #include "common/file/file_path.h" +#include "common/log/log_cleaner.h" #include "common/mutex.h" #include "proto/kv_helper.h" #include "proto/master_client.h" @@ -17,6 +18,8 @@ #include "proto/table_meta.pb.h" #include "proto/tabletnode_client.h" #include "sdk/table_impl.h" +#include "sdk/global_txn.h" +#include "sdk/sdk_perf.h" #include "sdk/sdk_utils.h" #include "sdk/sdk_zk.h" #include "utils/config_utils.h" @@ -43,6 +46,12 @@ DECLARE_int32(tera_sdk_rpc_max_pending_buffer_size); DECLARE_int32(tera_sdk_rpc_work_thread_num); DECLARE_int32(tera_sdk_show_max_num); DECLARE_bool(tera_online_schema_update_enabled); +DECLARE_string(tera_log_prefix); +DECLARE_bool(tera_info_log_clean_enable); +DECLARE_bool(tera_sdk_perf_collect_enabled); +DECLARE_int32(tera_gtxn_thread_max_num); +DECLARE_bool(tera_sdk_client_for_gtxn); +DECLARE_bool(tera_sdk_tso_client_enabled); namespace tera { @@ -55,14 +64,40 @@ void LogSdkVersionInfo() { ClientImpl::ClientImpl(const std::string& user_identity, const std::string& user_passcode) : thread_pool_(FLAGS_tera_sdk_thread_max_num), + gtxn_thread_pool_(NULL), user_identity_(user_identity), - user_passcode_(user_passcode) { + user_passcode_(user_passcode), + client_zk_adapter_(NULL), + tso_cluster_(NULL), + collecter_(NULL), + session_str_("") { tabletnode::TabletNodeClient::SetThreadPool(&thread_pool_); tabletnode::TabletNodeClient::SetRpcOption( FLAGS_tera_sdk_rpc_limit_enabled ? FLAGS_tera_sdk_rpc_limit_max_inflow : -1, FLAGS_tera_sdk_rpc_limit_enabled ? FLAGS_tera_sdk_rpc_limit_max_outflow : -1, FLAGS_tera_sdk_rpc_max_pending_buffer_size, FLAGS_tera_sdk_rpc_work_thread_num); - cluster_ = sdk::NewClusterFinder(); + + if (FLAGS_tera_sdk_client_for_gtxn) { + client_zk_adapter_ = sdk::NewClientZkAdapter(); + client_zk_adapter_->Init(); + cluster_ = sdk::NewClusterFinder(client_zk_adapter_); + if (FLAGS_tera_sdk_tso_client_enabled) { + tso_cluster_ = sdk::NewTimeoracleClusterFinder(); + } + gtxn_thread_pool_ = new ThreadPool(FLAGS_tera_gtxn_thread_max_num); + RegisterSelf(); + } else { + cluster_ = sdk::NewClusterFinder(); + } + + if (FLAGS_tera_sdk_perf_collect_enabled) { + collecter_ = new sdk::PerfCollecter(); + collecter_->Run(); + LOG(INFO) << "start perf collect"; + } else { + LOG(INFO) << "perf collect disable"; + } + pthread_once(&sdk_client_once_control, LogSdkVersionInfo); } @@ -77,6 +112,17 @@ ClientImpl::~ClientImpl() { } } delete cluster_; + if (FLAGS_tera_sdk_perf_collect_enabled) { + collecter_->Stop(); + delete collecter_; + } + if (FLAGS_tera_sdk_client_for_gtxn) { + delete gtxn_thread_pool_; + if (FLAGS_tera_sdk_tso_client_enabled) { + delete tso_cluster_; + } + delete client_zk_adapter_; + } } bool ClientImpl::CreateTable(const TableDescriptor& desc, ErrorCode* err) { @@ -1173,6 +1219,29 @@ bool ClientImpl::ParseTabletEntry(const TabletMeta& meta, std::vectorIsClientAlive(path); + } + return true; +} + +std::string ClientImpl::ClientSession() { + return session_str_; +} + +bool ClientImpl::RegisterSelf() { + if (client_zk_adapter_ != NULL) { + return client_zk_adapter_->RegisterClient(&session_str_); + } else { + return false; + } +} + static Mutex g_mutex; static bool g_is_glog_init = false; @@ -1223,6 +1292,14 @@ static int InitFlags(const std::string& confpath, const std::string& log_prefix) if (!g_is_glog_init) { ::google::InitGoogleLogging(log_prefix.c_str()); utils::SetupLog(log_prefix); + FLAGS_tera_log_prefix = log_prefix; + // start log cleaner + if (FLAGS_tera_info_log_clean_enable) { + common::LogCleaner::StartCleaner(); + LOG(INFO) << "start log cleaner"; + } else { + LOG(INFO) << "log cleaner is disable"; + } g_is_glog_init = true; } diff --git a/src/sdk/client_impl.h b/src/sdk/client_impl.h index f401111f3..246e7608d 100644 --- a/src/sdk/client_impl.h +++ b/src/sdk/client_impl.h @@ -8,9 +8,11 @@ #include "common/thread_pool.h" #include "proto/master_rpc.pb.h" #include "proto/tabletnode_client.h" +#include "sdk/sdk_perf.h" #include "sdk/sdk_zk.h" +#include "sdk/timeoracle_client_impl.h" #include "tera.h" -#include "utils/timer.h" +#include "common/timer.h" using std::string; @@ -97,6 +99,8 @@ class ClientImpl : public Client { string* str_result, ErrorCode* err); + virtual Transaction* NewGlobalTransaction(); + bool ShowTableSchema(const string& name, TableSchema* meta, ErrorCode* err); bool ShowTablesInfo(const string& name, TableMeta* meta, @@ -117,6 +121,10 @@ class ClientImpl : public Client { void CloseTable(const string& table_name); TableImpl* OpenTableInternal(const string& table_name, ErrorCode* err); + bool IsClientAlive(const string& path); + + string ClientSession(); + private: bool ListInternal(std::vector* table_list, std::vector* tablet_list, @@ -147,10 +155,13 @@ class ClientImpl : public Client { bool is_brief, ErrorCode* err); + bool RegisterSelf(); + private: ClientImpl(const ClientImpl&); void operator=(const ClientImpl&); ThreadPool thread_pool_; + ThreadPool* gtxn_thread_pool_; std::string user_identity_; std::string user_passcode_; @@ -160,7 +171,11 @@ class ClientImpl : public Client { /// we have to access zookeeper whenever we need master_addr or root_table_addr. /// if there is cluster_, /// we save master_addr & root_table_addr in cluster_, access zookeeper only once. + sdk::ClientZkAdapterBase* client_zk_adapter_; sdk::ClusterFinder* cluster_; + sdk::ClusterFinder* tso_cluster_; + sdk::PerfCollecter* collecter_; + std::string session_str_; Mutex open_table_mutex_; struct TableHandle { diff --git a/src/sdk/global_txn.cc b/src/sdk/global_txn.cc new file mode 100644 index 000000000..a003cbd64 --- /dev/null +++ b/src/sdk/global_txn.cc @@ -0,0 +1,1142 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include +#include + +#include "common/metric/metric_counter.h" +#include "common/this_thread.h" +#include "common/thread.h" +#include "proto/table_meta.pb.h" +#include "proto/tabletnode_rpc.pb.h" +#include "sdk/global_txn.h" +#include "sdk/read_impl.h" +#include "sdk/timeoracle_client_impl.h" + +DECLARE_bool(tera_gtxn_test_opened); +DECLARE_string(tera_gtxn_test_flagfile); +DECLARE_int32(tera_gtxn_get_waited_times_limit); +DECLARE_int32(tera_gtxn_timeout_ms); +DECLARE_bool(tera_sdk_tso_client_enabled); + +namespace tera { + +extern tera::MetricCounter gtxn_read_cnt; +extern tera::MetricCounter gtxn_read_fail_cnt; +extern tera::MetricCounter gtxn_read_retry_cnt; +extern tera::MetricCounter gtxn_read_rollback_cnt; +extern tera::MetricCounter gtxn_read_rollforward_cnt; +extern tera::MetricCounter gtxn_commit_cnt; +extern tera::MetricCounter gtxn_commit_fail_cnt; +extern tera::MetricCounter gtxn_prewrite_cnt; +extern tera::MetricCounter gtxn_prewrite_fail_cnt; +extern tera::MetricCounter gtxn_primary_cnt; +extern tera::MetricCounter gtxn_primary_fail_cnt; +extern tera::MetricCounter gtxn_secondaries_cnt; +extern tera::MetricCounter gtxn_secondaries_fail_cnt; +extern tera::MetricCounter gtxn_acks_cnt; +extern tera::MetricCounter gtxn_acks_fail_cnt; +extern tera::MetricCounter gtxn_notifies_cnt; +extern tera::MetricCounter gtxn_notifies_fail_cnt; + +Transaction* GlobalTxn::NewGlobalTxn(tera::Client* client, + common::ThreadPool* thread_pool, + sdk::ClusterFinder* tso_cluster) { + if (client != NULL) { + return new GlobalTxn(client, thread_pool, tso_cluster); + } + LOG(ERROR) << "client or tso_cluster is NULL"; + return NULL; +} + +GlobalTxn::GlobalTxn(tera::Client* client, + common::ThreadPool* thread_pool, + sdk::ClusterFinder* tso_cluster) : + gtxn_internal_(new GlobalTxnInternal(client)), + status_returned_(false), + primary_write_(NULL), + writes_size_(0), + commit_ts_(0), + isolation_level_(IsolationLevel::kSnapshot), + serialized_primary_(""), + finish_(false), + finish_cond_(&finish_mutex_), + has_commited_(false), + user_commit_callback_(NULL), + user_commit_context_(NULL), + thread_pool_(thread_pool), + tso_cluster_(tso_cluster), + timeout_ms_(FLAGS_tera_gtxn_timeout_ms), + all_task_pushed_(false) { + if (FLAGS_tera_gtxn_test_opened) { + VLOG(12) << "conf_file = " << FLAGS_tera_gtxn_test_flagfile; + start_ts_ = gtxn_internal_->TEST_Init(FLAGS_tera_gtxn_test_flagfile); + } else if (!FLAGS_tera_sdk_tso_client_enabled) { + start_ts_ = get_micros(); + } else { + timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); + start_ts_ = tsoc.GetTimestamp(1); + if (start_ts_ == 0) { + status_.SetFailed(ErrorCode::kGTxnTimestampLost); + status_returned_ = true; + } + } + prewrite_start_ts_ = start_ts_; + gtxn_internal_->SetStartTimestamp(start_ts_); +} + +GlobalTxn::~GlobalTxn() { +} + +void GlobalTxn::SetIsolation(const IsolationLevel& isolation_level) { + assert(has_commited_ == false); + isolation_level_ = isolation_level; +} + +void GlobalTxn::SetTimeout(int64_t timeout_ms) { + timeout_ms_ = timeout_ms; +} + +int64_t GlobalTxn::Timeout() { + return timeout_ms_; +} + +void GlobalTxn::SetReaderStatusAndRunCallback(RowReaderImpl* reader_impl, + ErrorCode* status) { + gtxn_read_cnt.Inc(); + gtxn_internal_->PerfReadDelay(0, get_micros()); // finish_time + VLOG(12) << "[gtxn][get][" << start_ts_ << "][status] :" << status->ToString(); + reader_impl->SetError(status->GetType(), status->GetReason()); + thread_pool_->AddTask(std::bind(&RowReaderImpl::RunCallback, reader_impl)); +} + +ErrorCode GlobalTxn::Get(RowReader* row_reader) { + assert(row_reader != NULL); + gtxn_internal_->PerfReadDelay(get_micros(), 0); // begin_time + gtxn_internal_->TEST_GetSleep(); + + RowReaderImpl* reader_impl = static_cast(row_reader); + reader_impl->SetTransaction(this); + + // Pre Check can read + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + if (has_commited_.load()) { + std::string reason = "get failed, txn has commited @ [" + + std::to_string(start_ts_) + "," + std::to_string(commit_ts_); + LOG(ERROR) << "[gtxn][get][" << start_ts_ <<"] " << reason; + status.SetFailed(ErrorCode::kGTxnOpAfterCommit, reason); + SetReaderStatusAndRunCallback(reader_impl, &status); + return status; + } + + Table* table = row_reader->GetTable(); + const std::string& row_key = row_reader->RowKey(); + // Check UserReader and Build cells + if (!gtxn_internal_->VerifyUserRowReader(row_reader)) { + status = reader_impl->GetError(); + SetReaderStatusAndRunCallback(reader_impl, &status); + return status; + } + + std::vector cells; + for (auto it : row_reader->GetReadColumnList()) { + const std::string& column_family = it.first; + const std::set& qualifier_set = it.second; + + for (auto q_it = qualifier_set.begin(); q_it != qualifier_set.end(); ++q_it) { + const std::string& qualifier = *q_it; + cells.push_back(new Cell(table, row_key, column_family, qualifier)); + } + } + int expected_cells_cnt = cells.size(); + + InternalReaderContext* ctx = new InternalReaderContext(expected_cells_cnt, reader_impl, this); + for(auto& cell : cells) { + ctx->cell_map[cell] = 0; // cell* -> try_time, default = 0 + AsyncGetCell(cell, reader_impl, ctx); + } + + // sync wait and set status + if(!reader_impl->IsAsync()) { + reader_impl->Wait(); + status = reader_impl->GetError(); + return status; + } + return status; +} + +void GlobalTxn::AsyncGetCell(Cell* cell, + RowReaderImpl* user_reader_impl, + InternalReaderContext* ctx) { + VLOG(12) << "[gtxn][get][" << start_ts_ << "] " + << gtxn_internal_->DebugString(*cell, "TryGet times(" + std::to_string(ctx->cell_map[cell]) + ")"); + + Table* table = cell->Table(); + RowReader* reader = table->NewRowReader(cell->RowKey()); + reader->AddColumn(cell->ColFamily(), cell->LockName()); + reader->AddColumn(cell->ColFamily(), cell->WriteName()); + reader->AddColumn(cell->ColFamily(), cell->Qualifier()); + reader->SetTimeRange(0, kMaxTimeStamp); + reader->SetMaxVersions(UINT32_MAX); + reader->SetCallBack([] (RowReader* r) { + CellReaderContext* ctx = (CellReaderContext*)r->GetContext(); + GlobalTxn* gtxn = static_cast(ctx->internal_reader_ctx->gtxn); + gtxn->thread_pool_->AddTask(std::bind(&GlobalTxn::DoGetCellReaderCallback, + gtxn, static_cast(r))); + }); + reader->SetContext(new CellReaderContext(cell, ctx)); + table->Get(reader); +} + +void GlobalTxn::DoGetCellReaderCallback(RowReader* reader) { + ErrorCode status = reader->GetError(); + if (status.GetType() != ErrorCode::kOK) { + MergeCellToRow(reader, status); + return; + } + + RowReader::TRow row; + reader->ToMap(&row); + CellReaderContext* ctx = (CellReaderContext*)reader->GetContext(); + Cell* cell = ctx->cell; + if (row.find(cell->ColFamily()) == row.end()) { + status.SetFailed(ErrorCode::kNotFound, "columnfamily not found"); + MergeCellToRow(reader, status); + return; + } + // local check lock + if (gtxn_internal_->IsLockedByOthers(row, *cell)) { + // sync operate + status.SetFailed(ErrorCode::kOK); + InternalReaderContext* internal_reader_ctx = ctx->internal_reader_ctx; + bool do_clean = false; + // check clean lock before read cell next time, + // when read times >= limit - 1 do clean lock opreations + if (internal_reader_ctx->cell_map[cell] >= FLAGS_tera_gtxn_get_waited_times_limit - 1) { + do_clean = true; + } + BackoffAndMaybeCleanupLock(row, *cell, do_clean, &status); + if (status.GetType() == ErrorCode::kOK) { + // call Next time to async GetCell + // don't merge until next time ok or failed + ++ internal_reader_ctx->cell_map[cell]; + gtxn_read_retry_cnt.Inc(); + AsyncGetCell(cell, + static_cast(internal_reader_ctx->user_reader), + internal_reader_ctx); + return; + } + } else if (!FindValueFromResultRow(row, cell)) { + status.SetFailed(ErrorCode::kNotFound, "build data col from write col failed"); + } + MergeCellToRow(reader, status); +} + +void GlobalTxn::MergeCellToRow(RowReader* internal_reader, + const ErrorCode& status) { + CellReaderContext* ctx = (CellReaderContext*)internal_reader->GetContext(); + ctx->status = status; + VLOG(12) << "[gtxn][get][" << start_ts_ << "] " + << gtxn_internal_->DebugString(*(ctx->cell), status.ToString()); + GetCellCallback(ctx); + // next time internal read will new next RowReader + delete internal_reader; +} + +void GlobalTxn::GetCellCallback(CellReaderContext* ctx) { + InternalReaderContext* internal_reader_ctx = ctx->internal_reader_ctx; + Cell* cell = ctx->cell; + bool last_cell = false; + { + MutexLock lock(&mu_); + ++internal_reader_ctx->active_cell_cnt; + if (internal_reader_ctx->fail_cell_cnt == 0 && ctx->status.GetType() == ErrorCode::kOK) { + KeyValuePair* kv = internal_reader_ctx->results.add_key_values(); + kv->set_key(cell->RowKey()); + kv->set_column_family(cell->ColFamily()); + kv->set_qualifier(cell->Qualifier()); + kv->set_timestamp(cell->Timestamp()); + kv->set_value(cell->Value()); + } else if (ctx->status.GetType() != ErrorCode::kNotFound) { + ++internal_reader_ctx->fail_cell_cnt; + internal_reader_ctx->results.clear_key_values(); + internal_reader_ctx->last_err = ctx->status; + } else { + ++internal_reader_ctx->not_found_cnt; + } + last_cell = (internal_reader_ctx->active_cell_cnt == internal_reader_ctx->expected_cell_cnt); + } + if (last_cell) { + ErrorCode last_err = internal_reader_ctx->last_err; + RowReaderImpl* reader_impl = static_cast(internal_reader_ctx->user_reader); + if (internal_reader_ctx->fail_cell_cnt > 0) { + gtxn_read_fail_cnt.Inc(); + } else if (internal_reader_ctx->not_found_cnt == internal_reader_ctx->expected_cell_cnt) { + // all cell not found + last_err.SetFailed(ErrorCode::kNotFound); + } else { + reader_impl->SetResult(internal_reader_ctx->results); + last_err.SetFailed(ErrorCode::kOK); + } + delete internal_reader_ctx; + SetReaderStatusAndRunCallback(reader_impl, &last_err); + } +} + +bool GlobalTxn::FindValueFromResultRow(RowReader::TRow& result_row, Cell* target_cell) { + + auto write_col_it = result_row[target_cell->ColFamily()].find(target_cell->WriteName()); + auto data_col_it = result_row[target_cell->ColFamily()].find(target_cell->Qualifier()); + + // check write col and data col exsit + if (write_col_it == result_row[target_cell->ColFamily()].end() + || data_col_it == result_row[target_cell->ColFamily()].end()) { + return false; + } + auto write_col = result_row[target_cell->ColFamily()][target_cell->WriteName()]; + auto data_col = result_row[target_cell->ColFamily()][target_cell->Qualifier()]; + + for (auto k1 = write_col.rbegin(); k1 != write_col.rend(); ++k1) { + int64_t write_ts = k1->first; + std::string write_value = k1->second; + VLOG(12) << "[gtxn][get][" << start_ts_ << "] found write col, ts=" + << write_ts << ", internal val = " << write_value; + int write_type; + int64_t data_ts; + // skip new version value or skip error write format version + if (write_ts > start_ts_ || !DecodeWriteValue(write_value, &write_type, &data_ts)) { + continue; + } + VLOG(12) << "[gtxn][get][" << start_ts_ << "] decode write col, ts=" + << write_ts << ", type=" << write_type << ", value=" << data_ts; + // get data col , ts == data_ts + for (auto k2 = data_col.rbegin(); k2 != data_col.rend(); ++k2) { + VLOG(12) << "[gtxn][get][" << start_ts_ << "] found data col, ts=" + << k2->first << ", internal val = " << k2->second; + if (k2->first == data_ts && write_type == RowMutation::kPut) { + target_cell->SetTimestamp(data_ts); + target_cell->SetValue(k2->second); + return true; + } else if (k2->first < data_ts) { + VLOG(12) << "[gtxn][get][" << start_ts_ + << "] data cell version not found, v=" << k2->first; + break; + } + } + VLOG(12) << "[gtxn][get][" << start_ts_ << "] check data col failed, no data"; + break; + } + VLOG(12) << "[gtxn][get][" << start_ts_ + << "] write col versions count" << write_col.size(); + return false; +} + +void GlobalTxn::BackoffAndMaybeCleanupLock(RowReader::TRow& row, const Cell& cell, + const bool try_clean, ErrorCode* status) { + VLOG(12) << gtxn_internal_->DebugString(cell, "[gtxn][get][" + + std::to_string(start_ts_) + " backoff or cleanup lock"); + // get lock ts + int64_t lock_ts = -1; + int lock_type = -1; + tera::PrimaryInfo primary_info; + for (auto k = row[cell.ColFamily()][cell.LockName()].rbegin(); + k != row[cell.ColFamily()][cell.LockName()].rend(); ++k) { + if (k->first < start_ts_) { + lock_ts = k->first; + VLOG(12) << "lock_ts=" << lock_ts << ", primary_str=" << k->second; + if (!DecodeLockValue(k->second, &lock_type, &primary_info)) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "can't found primary"); + return; + } + break; + } + } + // get primary lock + const std::string& process = "[gtxn][get][" + std::to_string(start_ts_) + + "][check locked and writed]"; + bool ret = gtxn_internal_->PrimaryIsLocked(primary_info, lock_ts, status); + if (status->GetType() != ErrorCode::kOK && status->GetType() != ErrorCode::kNotFound) { + LOG(ERROR) << gtxn_internal_->DebugString(cell, process + " failed," + status->ToString()); + return; + } else if (ret) { + // NotFound means : other txn on prewrite process + // and this cell locked but primary unlocked(failed) + VLOG(12) << gtxn_internal_->DebugString(cell, process + " succeed"); + // primary at prewrite do (1) clean or (2) wait + if (try_clean) { + CleanLock(cell, primary_info, status); + } else if (gtxn_internal_->SuspectLive(primary_info)) { + // TODO add a better sleep strategy + ThisThread::Sleep(100); + } else { + CleanLock(cell, primary_info, status); + } + } else { + if (!gtxn_internal_->IsPrimary(cell, primary_info)) { + VLOG(12) << gtxn_internal_->DebugString(cell, process + ", will do rollforward"); + // primary maybe at commited do roll_forward + RollForward(cell, primary_info, lock_type, status); + if (status->GetType() == ErrorCode::kGTxnPrimaryLost) { + VLOG(12) << gtxn_internal_->DebugString(cell, process + ", rollforward failed, try clean lock"); + // primary prewrite failed + status->SetFailed(ErrorCode::kOK); + if (try_clean) { + CleanLock(cell, primary_info, status); + } else if (gtxn_internal_->SuspectLive(primary_info)) { + ThisThread::Sleep(100); + } else { + CleanLock(cell, primary_info, status); + } + } + } else { + VLOG(12) << gtxn_internal_->DebugString(cell, process + ", ignore(primary)"); + } + } +} + +void GlobalTxn::CleanLock(const Cell& cell, const tera::PrimaryInfo& primary, ErrorCode* status) { + gtxn_read_rollback_cnt.Inc(); + Table* primary_table = gtxn_internal_->FindTable(primary.table_name()); + assert(primary_table != NULL); + const Cell& primary_cell = Cell(primary_table, primary.row_key(), + primary.column_family(), primary.qualifier()); + // if now cell is primary + bool is_same = cell.Table()->GetName() == primary_table->GetName() + && cell.RowKey() == primary_cell.RowKey() + && cell.ColFamily() == primary_cell.ColFamily() + && cell.LockName() == primary_cell.LockName(); + if (!is_same) { + VLOG(12) << "[gtxn][get][" << start_ts_ << "] " + << gtxn_internal_->DebugString(primary_cell, "clean lock primary"); + RowMutation* pri_mu = primary_table->NewRowMutation(primary_cell.RowKey()); + // delete all info between [0, start_ts_] at lock col + pri_mu->DeleteColumns(primary_cell.ColFamily(), primary_cell.LockName(), start_ts_); + primary_table->ApplyMutation(pri_mu); + if (pri_mu->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << pri_mu->GetError().ToString(); + *status = pri_mu->GetError(); + } + delete pri_mu; + } + VLOG(12) << "[gtxn][get][" << start_ts_ << "] " + << gtxn_internal_->DebugString(cell, "clean lock this cell"); + RowMutation* this_mu = (cell.Table())->NewRowMutation(cell.RowKey()); + // delete all info between [0, start_ts_] at lock col + this_mu->DeleteColumns(cell.ColFamily(), cell.LockName(), start_ts_); + (cell.Table())->ApplyMutation(this_mu); + if (this_mu->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "[gtxn][get][" << start_ts_ << "] clean lock failed :" + << this_mu->GetError().ToString(); + *status = this_mu->GetError(); + } + delete this_mu; +} + +void GlobalTxn::RollForward(const Cell& cell, const tera::PrimaryInfo& primary, + int lock_type, ErrorCode* status) { + gtxn_read_rollforward_cnt.Inc(); + // find primary write col start_ts + Table* pri_table = gtxn_internal_->FindTable(primary.table_name()); + assert(pri_table != NULL); + std::unique_ptr primary_cell(new Cell(pri_table, primary.row_key(), + primary.column_family(), + primary.qualifier())); + RowReader* reader = pri_table->NewRowReader(primary_cell->RowKey()); + reader->AddColumn(primary_cell->ColFamily(), primary_cell->WriteName()); + reader->SetTimeRange(0, kMaxTimeStamp); + reader->SetMaxVersions(UINT32_MAX); + pri_table->Get(reader); + if (reader->GetError().GetType() != ErrorCode::kOK) { + if (reader->GetError().GetType() == ErrorCode::kNotFound) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "primary lost, not 'lock' and 'write'"); + } else { + LOG(WARNING) << status->GetReason(); + *status = reader->GetError(); + } + delete reader; + return; + } + int64_t commit_ts = -1; + int write_type; + int64_t data_ts = -1; + while (!reader->Done()) { + // decode primary cell write col value + std::string reader_value = reader->Value(); + DecodeWriteValue(reader_value, &write_type, &data_ts); + VLOG(12) << "[gtxn][get][ " << start_ts_ << "] decode primary 'write', ts=" << reader->Timestamp() + << ", type=" << write_type << ", value=" << data_ts; + VLOG(12) << "[gtxn][get][ " << start_ts_ << "] primary start_ts=" << primary.gtxn_start_ts(); + if (data_ts > 0 && data_ts < primary.gtxn_start_ts()) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "primary lost, not 'lock' and 'write'"); + delete reader; + return; + } else if (data_ts == primary.gtxn_start_ts()) { + commit_ts = reader->Timestamp(); + break; + } + reader->Next(); + } + delete reader; + + if (commit_ts > 0) { + RowMutation* this_mu = cell.Table()->NewRowMutation(cell.RowKey()); + this_mu->Put(cell.ColFamily(), + cell.WriteName(), + EncodeWriteValue(lock_type, data_ts), + commit_ts); + this_mu->DeleteColumns(cell.ColFamily(), cell.LockName(), commit_ts); + cell.Table()->ApplyMutation(this_mu); + if (this_mu->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << this_mu->GetError().GetReason(); + *status = this_mu->GetError(); + } + delete this_mu; + } else { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, "not found primary cell"); + } +} + +void GlobalTxn::SaveWrite(const std::string& tablename, const std::string& row_key, + tera::Write& w) { + MutexLock lock(&mu_); + TableWithRowkey twr(tablename, row_key); + auto it = writes_.find(twr); + if (it != writes_.end()) { + std::vector* ws_ptr = &(writes_[twr]); + ws_ptr->push_back(w); + } else { + std::vector ws; + ws.push_back(w); + writes_[twr] = ws; + writes_cnt_.Inc(); + } +} + +void GlobalTxn::SetLastStatus(ErrorCode* status) { + MutexLock lock(&mu_); + if (!status_returned_) { + VLOG(12) << "[gtxn][commit][status][" << start_ts_ << "]" << status->ToString(); + status_.SetFailed(status->GetType(), status->GetReason()); + status_returned_ = true; + } +} + +void GlobalTxn::RunUserCallback() { + if (status_.GetType() == ErrorCode::kOK) { + gtxn_commit_cnt.Inc(); + } else { + gtxn_commit_fail_cnt.Inc(); + } + gtxn_internal_->PerfCommitDelay(0, get_micros()); // finish_time + if (user_commit_callback_ != NULL) { + VLOG(12) << "[gtxn][commit][callback][" << start_ts_ << "]" << status_.ToString(); + user_commit_callback_(this); + } else { + MutexLock lock(&finish_mutex_); + VLOG(12) << "[gtxn][commit][finish][" << start_ts_ << "]" << status_.ToString(); + finish_ = true; + finish_cond_.Signal(); + } +} + +ErrorCode GlobalTxn::Commit() { + /// begin commit + gtxn_internal_->TEST_Sleep(); + gtxn_internal_->PerfCommitDelay(get_micros(), 0); // begin_time + ErrorCode status; + if (put_fail_cnt_.Get() > 0 || has_commited_) { + std::string reason("commit failed, has_commited[" + + std::to_string(has_commited_.load()) + + "], put_fail_cnt[" + std::to_string(put_fail_cnt_.Get()) + "]"); + VLOG(12) << reason; + status.SetFailed(ErrorCode::kGTxnOpAfterCommit, reason); + SetLastStatus(&status); + // Callback Point : put applyMutation failed or has commited + RunUserCallback(); + return status; + } + has_commited_ = true; + // don't have any writes + if (writes_cnt_.Get() == 0) { + status.SetFailed(ErrorCode::kOK, "No modification exists"); + SetLastStatus(&status); + // Callback Point + RunUserCallback(); + return status; + } + thread_pool_->AddTask(std::bind(&GlobalTxn::InternalCommit, this)); + + if (user_commit_callback_ == NULL) { + WaitForComplete(); + } + return status_; +} + +void GlobalTxn::InternalCommit() { + gtxn_internal_->SetCommitDuration(timeout_ms_); + + /// begin prewrite + gtxn_internal_->TEST_Sleep(); + + // on ReadCommitedSnapshot level will get new timestamp before prewrite + if (isolation_level_ == IsolationLevel::kReadCommitedSnapshot) { + if (FLAGS_tera_gtxn_test_opened) { + prewrite_start_ts_ = gtxn_internal_->TEST_GetPrewriteStartTimestamp(); + } else if (!FLAGS_tera_sdk_tso_client_enabled) { + start_ts_ = get_micros(); + } else { + timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); + prewrite_start_ts_ = tsoc.GetTimestamp(1); + } + if (prewrite_start_ts_ < start_ts_) { + ErrorCode status; + LOG(ERROR) << "[gtxn][prewrite][" << start_ts_ <<"] get prewrite new ts failed"; + status.SetFailed(ErrorCode::kGTxnTimestampLost, "get prewrite new ts failed"); + SetLastStatus(&status); + RunUserCallback(); + return; + } + gtxn_internal_->SetPrewriteStartTimestamp(prewrite_start_ts_); + } + VLOG(12) << "[gtxn][prewrite][" << start_ts_ << "]"; + gtxn_internal_->PerfPrewriteDelay(get_micros(), 0); // begin_time + gtxn_prewrite_cnt.Inc(); + + prewrite_iterator_ = writes_.begin(); + primary_write_ = &(prewrite_iterator_->second[0]); + primary_write_->Serialize(prewrite_start_ts_, + gtxn_internal_->GetClientSession(), + &serialized_primary_); + AsyncPrewrite(&prewrite_iterator_->second); +} + +// [prewrite] Step(1): +// read "lock", "write" column from tera +// +// aysnc prewrite one row use single_row_txn +// +void GlobalTxn::AsyncPrewrite(std::vector* ws) { + assert(ws->size() > 0); + // find table and rowkey to new reader and single row txn + Write w = *(ws->begin()); + Table* table = w.Table(); + Transaction* single_row_txn = table->StartRowTransaction(w.RowKey()); + RowReader* reader = table->NewRowReader(w.RowKey()); + // set internal reader timeout + gtxn_internal_->SetInternalSdkTaskTimeout(reader); + // set cf qu and timerange for reader + gtxn_internal_->BuildRowReaderForPrewrite(*ws, reader); + // set callback, context, single row txn for reader + reader->SetCallBack([](RowReader* r){ + GlobalTxn* gtxn = static_cast(((PrewriteContext*)r->GetContext())->gtxn); + gtxn->thread_pool_->AddTask(std::bind(&GlobalTxn::DoPrewriteReaderCallback, gtxn, r)); + }); + PrewriteContext* ctx = new PrewriteContext(ws, this, w.TableName(), w.RowKey()); + if (gtxn_internal_->IsTimeOut()) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, "global transaction prewrite timeout"); + VLOG(12) << "[gtxn][prewrite][stxn_read] ignored : " << ctx->DebugString(); + RunAfterPrewriteFailed(ctx); + } else { + reader->SetContext(ctx); + // get async + VLOG(12) << "[gtxn][prewrite][stxn_read] invoked : " << ctx->DebugString(); + single_row_txn->Get(reader); + } +} + +// [prewrite] Step(2): +// a) verify [prewrite] step(1) read result status and no conflict +// b) write "lock" and "data" column to tera, through same single_row_txn in step(1) +// +// call by [prewrite] step(1),through reader callback +// +void GlobalTxn::DoPrewriteReaderCallback(RowReader* reader) { + PrewriteContext* ctx = (PrewriteContext*)reader->GetContext(); + if (reader->GetError().GetType() != ErrorCode::kNotFound + && reader->GetError().GetType() != ErrorCode::kOK) { + ctx->status = reader->GetError(); + VLOG(12) << "[gtxn][prewrite][stxn_read] failed : " << ctx->status.ToString(); + if (gtxn_internal_->IsTimeOut() || reader->GetError().GetType() == ErrorCode::kTimeout) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, ctx->status.ToString()); + } + delete reader; + RunAfterPrewriteFailed(ctx); + } else if (gtxn_internal_->ConflictWithOtherWrite(ctx->ws, reader, &(ctx->status))) { + VLOG(12) << "[gtxn][prewrite][stxn_read] failed : " << ctx->status.ToString(); + delete reader; + RunAfterPrewriteFailed(ctx); + } else { + VLOG(12) << "[gtxn][prewrite][stxn_read] succeed, table=" << ctx->DebugString(); + Table* t = reader->GetTable(); + RowMutation* prewrite_mu = t->NewRowMutation(reader->RowKey()); + // set internal task timeout + gtxn_internal_->SetInternalSdkTaskTimeout(prewrite_mu); + gtxn_internal_->BuildRowMutationForPrewrite(ctx->ws, prewrite_mu, + serialized_primary_); + + // commit single_row_txn + SingleRowTxn* single_row_txn = static_cast(reader->GetTransaction()); + delete reader; + single_row_txn->SetContext(ctx); + single_row_txn->SetCommitCallback([](Transaction* single_txn) { + GlobalTxn* gtxn = static_cast(((PrewriteContext*)single_txn->GetContext())->gtxn); + SingleRowTxn* stxn = static_cast(single_txn); + gtxn->thread_pool_->AddTask(std::bind(&GlobalTxn::DoPrewriteCallback, gtxn, stxn)); + }); + if (gtxn_internal_->IsTimeOut()) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, "global transaction prewrite timeout"); + VLOG(12) << "[gtxn][prewrite][stxn_commit] ignored : " << ctx->DebugString(); + delete single_row_txn; + delete prewrite_mu; + RunAfterPrewriteFailed(ctx); + } else { + single_row_txn->ApplyMutation(prewrite_mu); + VLOG(12) << "[gtxn][prewrite][stxn_commit] invoked : " << ctx->DebugString(); + t->CommitRowTransaction(single_row_txn); + delete prewrite_mu; + } + } +} + +// prewrite Step(3): +// verify [prewrite] step(2) single_row_txn commit status, +// if the last prewrite callback and status ok, will call [commit] +// +// call by [prewrite] step(2), through single_row_txn commit callback +// +void GlobalTxn::DoPrewriteCallback(SingleRowTxn* single_row_txn) { + ErrorCode status = single_row_txn->GetError(); + PrewriteContext* ctx = (PrewriteContext*)single_row_txn->GetContext(); + delete single_row_txn; + if (gtxn_internal_->IsTimeOut() || status.GetType() != ErrorCode::kOK) { + // wapper timeout status for global transaction + if (gtxn_internal_->IsTimeOut() || status.GetType() == ErrorCode::kTimeout) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, status.ToString()); + } else { + ctx->status.SetFailed(status.GetType(), status.ToString()); + } + VLOG(12) << "[gtxn][prewrite][stxn_commit] failed : " << ctx->DebugString(); + RunAfterPrewriteFailed(ctx); + } else if (++prewrite_iterator_ != writes_.end()) { + thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncPrewrite, this, &(prewrite_iterator_->second))); + } else { + gtxn_internal_->PerfPrewriteDelay(0, get_micros()); // finish_time + VLOG(12) << "prewrite done, next step"; + InternalCommitPhase2(); + } +} + +void GlobalTxn::RunAfterPrewriteFailed(PrewriteContext* ctx) { + gtxn_internal_->PerfPrewriteDelay(0, get_micros()); // finish_time + gtxn_prewrite_fail_cnt.Inc(); + if (gtxn_internal_->IsTimeOut() || ctx->status.GetType() == ErrorCode::kTimeout) { + ctx->status.SetFailed(ErrorCode::kGTxnPrewriteTimeout, ctx->status.ToString()); + } + SetLastStatus(&ctx->status); + delete ctx; + RunUserCallback(); +} + +// commit phase2 Step(1): +// a) get timestamp from timeoracle for commit_ts +// b) sync commit primary write through single_row_txn +// (for this gtxn, on this step only one thread can work) +// c) loop call [commit phase2] step(2) +// +// call by [prewrite] step(3) +void GlobalTxn::InternalCommitPhase2() { + gtxn_internal_->PerfPrimaryCommitDelay(get_micros(), 0); // begin_time + gtxn_primary_cnt.Inc(); + gtxn_internal_->TEST_Sleep(); // end prewrite + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + gtxn_internal_->TEST_Sleep(); // wait to begin commit + + if (FLAGS_tera_gtxn_test_opened) { + commit_ts_ = gtxn_internal_->TEST_GetCommitTimestamp(); + } else if (!FLAGS_tera_sdk_tso_client_enabled) { + start_ts_ = get_micros(); + } else { + timeoracle::TimeoracleClientImpl tsoc(thread_pool_, tso_cluster_); + commit_ts_ = tsoc.GetTimestamp(1); + } + if (commit_ts_ < prewrite_start_ts_) { + LOG(ERROR) << "[gtxn][commit] get commit ts failed"; + status.SetFailed(ErrorCode::kGTxnTimestampLost, "get commit ts failed"); + SetLastStatus(&status); + gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); + gtxn_primary_fail_cnt.Inc(); + RunUserCallback(); + return; + } + + VLOG(12) << "[gtxn][commit] commit_ts:" << commit_ts_; + gtxn_internal_->TEST_Sleep(); // wait to begin primary commit + + /// begin to commit primary + VerifyPrimaryLocked(); +} + +void GlobalTxn::VerifyPrimaryLocked() { + Table* pri_t = primary_write_->Table(); + tera::Transaction* pri_txn = pri_t->StartRowTransaction(primary_write_->RowKey()); + RowReader* reader = pri_t->NewRowReader(primary_write_->RowKey()); + // set internal task timeout + gtxn_internal_->SetInternalSdkTaskTimeout(reader); + reader->AddColumn(primary_write_->ColFamily(), primary_write_->LockName()); + reader->SetTimeRange(prewrite_start_ts_, prewrite_start_ts_); + reader->SetCallBack([](RowReader* r) { + ((GlobalTxn*)r->GetContext())->DoVerifyPrimaryLockedCallback(r);}); + reader->SetContext(this); + pri_txn->Get(reader); +} + +void GlobalTxn::DoVerifyPrimaryLockedCallback(RowReader* reader) { + ErrorCode status = reader->GetError(); + SingleRowTxn* pri_txn = static_cast(reader->GetTransaction()); + delete reader; + + if (status.GetType() == ErrorCode::kOK) { + CommitPrimary(pri_txn); + } else { + delete pri_txn; + if (status.GetType() == ErrorCode::kNotFound) { + status.SetFailed(ErrorCode::kGTxnPrimaryLost, "primary 'lock' lost before commit"); + } else if (status.GetType() == ErrorCode::kTimeout) { + status.SetFailed(ErrorCode::kGTxnPrimaryCommitTimeout, status.ToString()); + } + SetLastStatus(&status); + gtxn_primary_fail_cnt.Inc(); + gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time + RunUserCallback(); + } +} + +void GlobalTxn::CommitPrimary(SingleRowTxn* pri_txn) { + Table* pri_t = primary_write_->Table(); + RowMutation* primary_mu = pri_t->NewRowMutation(primary_write_->RowKey()); + // set internal task timeout + gtxn_internal_->SetInternalSdkTaskTimeout(primary_mu); + primary_mu->Put(primary_write_->ColFamily(), primary_write_->WriteName(), + EncodeWriteValue(primary_write_->WriteType(), prewrite_start_ts_), commit_ts_); + primary_mu->DeleteColumns(primary_write_->ColFamily(), primary_write_->LockName(), commit_ts_); + pri_txn->ApplyMutation(primary_mu); + pri_txn->SetCommitCallback([] (Transaction* txn) { + ((GlobalTxn*)txn->GetContext())->CheckPrimaryStatusAndCommmitSecondaries(txn); + }); + pri_txn->SetContext(this); + pri_txn->Commit(); + delete primary_mu; +} + +void GlobalTxn::CheckPrimaryStatusAndCommmitSecondaries(Transaction* pri_txn) { + ErrorCode status = pri_txn->GetError(); + delete pri_txn; + gtxn_internal_->TEST_Sleep(); + // primary commit failed callback and return + if (status.GetType() != tera::ErrorCode::kOK) { + VLOG(12) << "[gtxn][commit] primary failed :[" << status.ToString() << "]"; + // Callback Point : primary commit failed + if (status.GetType() == ErrorCode::kTimeout) { + status.SetFailed(ErrorCode::kGTxnPrimaryCommitTimeout, status.ToString()); + } + SetLastStatus(&status); + gtxn_primary_fail_cnt.Inc(); + gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time + RunUserCallback(); + return; + } + gtxn_internal_->PerfPrimaryCommitDelay(0, get_micros()); // finish_time + if (acks_cnt_.Get() == 0 && notifies_cnt_.Get() == 0) { + SetLastStatus(&status); + } + // wait primary commit done + VLOG(12) << "[gtxn][commit] succeed :[" << start_ts_ + << "," << prewrite_start_ts_ << "," << commit_ts_ << "]"; + + std::vector* ws = &(writes_.begin()->second); + if (ws->size() == 1) { + writes_.erase(writes_.begin()); + writes_cnt_.Dec(); + } else { + ws->erase(ws->begin()); + } + + all_task_pushed_ = false; + /// begin commit secondaries + for (auto &same_row_writes : writes_) { + thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncCommitSecondaries, + this, &(same_row_writes.second))); + } + + /// begin ack + for (auto &same_row_acks : acks_) { + thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncAck, + this, &(same_row_acks.second))); + } + /// begin notify + for (auto &same_row_notifies : notifies_) { + thread_pool_->AddTask(std::bind(&GlobalTxn::AsyncNotify, + this, &(same_row_notifies.second))); + } + bool should_callback = false; + { + MutexLock lock(&mu_); + all_task_pushed_ = true; + should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() + && acks_cnt_.Get() == ack_done_cnt_.Get() + && notifies_cnt_.Get() == notify_done_cnt_.Get() + && all_task_pushed_ == true; + } + if (should_callback) { + RunUserCallback(); + } + +} + +void GlobalTxn::AsyncAck(std::vector* ws) { + gtxn_internal_->PerfAckDelay(get_micros(), 0); + gtxn_acks_cnt.Inc(); + assert(ws->size() > 0); + Write w = *(ws->begin()); + Table* table = w.Table(); + RowMutation* mu = table->NewRowMutation(w.RowKey()); + gtxn_internal_->SetInternalSdkTaskTimeout(mu); + gtxn_internal_->BuildRowMutationForAck(ws, mu); + mu->SetCallBack([](RowMutation* row_mu) { + ((GlobalTxn*)row_mu->GetContext())->DoAckCallback(row_mu);}); + mu->SetContext(this); + table->ApplyMutation(mu); +} + +void GlobalTxn::DoAckCallback(RowMutation* mutation) { + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "[gtxn][commit][ack], failed" + << mutation->GetError().GetReason(); + ErrorCode status; + status.SetFailed(ErrorCode::kGTxnOKButAckFailed, mutation->GetError().ToString()); + SetLastStatus(&status); + gtxn_acks_fail_cnt.Inc(); + } + delete mutation; + bool should_callback = false; + { + MutexLock lock(&mu_); + ack_done_cnt_.Inc(); + gtxn_internal_->PerfAckDelay(0, get_micros()); + should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() + && acks_cnt_.Get() == ack_done_cnt_.Get() + && notifies_cnt_.Get() == notify_done_cnt_.Get(); + } + + if (should_callback) { + RunUserCallback(); + } +} + +void GlobalTxn::AsyncNotify(std::vector* ws) { + gtxn_internal_->PerfNotifyDelay(get_micros(), 0); + gtxn_notifies_cnt.Inc(); + assert(ws->size() > 0); + Write w = *(ws->begin()); + Table* table = w.Table(); + RowMutation* mu = table->NewRowMutation(w.RowKey()); + gtxn_internal_->SetInternalSdkTaskTimeout(mu); + gtxn_internal_->BuildRowMutationForNotify(ws, mu, commit_ts_); + mu->SetCallBack([](RowMutation* row_mu) { + ((GlobalTxn*)row_mu->GetContext())->DoNotifyCallback(row_mu);}); + mu->SetContext(this); + table->ApplyMutation(mu); +} + +void GlobalTxn::DoNotifyCallback(RowMutation* mutation) { + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "[gtxn][commit][notify], failed" + << mutation->GetError().GetReason(); + ErrorCode status; + status.SetFailed(ErrorCode::kGTxnOKButNotifyFailed, mutation->GetError().ToString()); + gtxn_notifies_fail_cnt.Inc(); + SetLastStatus(&status); + } + delete mutation; + + bool should_callback = false; + { + MutexLock lock(&mu_); + notify_done_cnt_.Inc(); + gtxn_internal_->PerfNotifyDelay(0, get_micros()); + should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() + && acks_cnt_.Get() == ack_done_cnt_.Get() + && notifies_cnt_.Get() == notify_done_cnt_.Get() + && all_task_pushed_ == true; + } + + if (should_callback) { + RunUserCallback(); + } +} + +void GlobalTxn::AsyncCommitSecondaries(std::vector* ws) { + gtxn_internal_->PerfSecondariesCommitDelay(get_micros(), 0); // begin time + gtxn_secondaries_cnt.Inc(); + assert(ws->size() > 0); + Write w = *(ws->begin()); + Table* table = w.Table(); + RowMutation* mu = table->NewRowMutation(w.RowKey()); + gtxn_internal_->SetInternalSdkTaskTimeout(mu); + gtxn_internal_->BuildRowMutationForCommit(ws, mu, commit_ts_); + mu->SetCallBack([](RowMutation* row_mu) { + ((GlobalTxn*)row_mu->GetContext())->DoCommitSecondariesCallback(row_mu);}); + mu->SetContext(this); + table->ApplyMutation(mu); +} + +void GlobalTxn::DoCommitSecondariesCallback(RowMutation* mutation) { + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) << "[gtxn][commit][secondaries], failed" + << mutation->GetError().GetReason(); + gtxn_secondaries_fail_cnt.Inc(); + } + delete mutation; + + bool should_callback = false; + { + MutexLock lock(&mu_); + commit_secondaries_done_cnt_.Inc(); + gtxn_internal_->PerfSecondariesCommitDelay(0, get_micros()); // finish time + should_callback = commit_secondaries_done_cnt_.Get() == writes_cnt_.Get() + && acks_cnt_.Get() == ack_done_cnt_.Get() + && notifies_cnt_.Get() == notify_done_cnt_.Get() + && all_task_pushed_ == true; + } + + if (should_callback) { + RunUserCallback(); + } +} + +void GlobalTxn::ApplyMutation(RowMutation* row_mu) { + assert(row_mu != NULL); + + RowMutationImpl* row_mu_impl = static_cast(row_mu); + row_mu_impl->SetTransaction(this); + row_mu_impl->SetError(ErrorCode::kOK); + + bool can_apply = false; + if (!has_commited_.load()) { + assert(put_fail_cnt_.Get() > -1); + put_fail_cnt_.Inc(); + // check writes_size_ over limit + MutexLock lock(&mu_); + can_apply = gtxn_internal_->VerifyWritesSize(row_mu, &writes_size_); + } else { + std::string reason = "ApplyMutation failed, txn has committed at [" + + std::to_string(commit_ts_) + "]"; + LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "]" << reason; + row_mu_impl->SetError(ErrorCode::kGTxnOpAfterCommit, reason); + } + + size_t writes_cnt = 0; + + if (can_apply && gtxn_internal_->VerifyUserRowMutation(row_mu)) { + Table* table = row_mu->GetTable(); + const std::string& tablename = table->GetName(); + const std::string& row_key = row_mu->RowKey(); + for (size_t i = 0; i < row_mu->MutationNum(); ++i) { + const RowMutation::Mutation& mu = row_mu->GetMutation(i); + Cell cell(table, row_key, mu.family, mu.qualifier, start_ts_, mu.value); + Write w(cell, mu.type); + ++writes_cnt; + SaveWrite(tablename, row_key, w); + } + } + + bool is_async = row_mu_impl->IsAsync(); + ErrorCode mu_err = row_mu_impl->GetError(); + + if (mu_err.GetType() != ErrorCode::kOK || writes_cnt == 0) { + if (!status_returned_) { + status_.SetFailed(mu_err.GetType(), mu_err.GetReason()); + status_returned_ = true; + } + if (is_async) { + thread_pool_->AddTask(std::bind(&RowMutationImpl::RunCallback, row_mu_impl)); + } else { + // nothing to do + // sync mu_err != ok will return before put_fail_cnt -1 + } + return; + } + if (is_async) { + thread_pool_->AddTask(std::bind(&RowMutationImpl::RunCallback, row_mu_impl)); + } + // only succes put will -1 + assert(put_fail_cnt_.Get() > 0); + put_fail_cnt_.Dec(); +} + +// for wait commit +void GlobalTxn::WaitForComplete() { + MutexLock lock(&finish_mutex_); + while(!finish_) { + finish_cond_.Wait(); + } +} + +void GlobalTxn::Ack(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) { + if (t == NULL) { + LOG(ERROR) << "set ack cell failed"; + return; + } + const std::string& tablename = t->GetName(); + Cell cell(t, row_key, column_family, qualifier); + Write w(cell); + TableWithRowkey twr(tablename, row_key); + MutexLock lock(&mu_); + auto it = acks_.find(twr); + if (it != acks_.end()) { + std::vector* acks_ptr = &(acks_[twr]); + acks_ptr->push_back(w); + } else { + std::vector acks; + acks.push_back(w); + acks_[twr] = acks; + acks_cnt_.Inc(); + } +} + +void GlobalTxn::Notify(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) { + if (t == NULL) { + LOG(ERROR) << "set ack cell failed"; + return; + } + const std::string& tablename = t->GetName(); + Cell cell(t, row_key, column_family, qualifier); + Write w(cell); + TableWithRowkey twr(tablename, row_key); + MutexLock lock(&mu_); + auto it = notifies_.find(twr); + if (it != notifies_.end()) { + std::vector* notifies_ptr = &(notifies_[twr]); + notifies_ptr->push_back(w); + } else { + std::vector notifies; + notifies.push_back(w); + notifies_[twr] = notifies; + notifies_cnt_.Inc(); + } +} + +} // namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/global_txn.h b/src/sdk/global_txn.h new file mode 100644 index 000000000..de5832166 --- /dev/null +++ b/src/sdk/global_txn.h @@ -0,0 +1,273 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_SDK_GLOBAL_TXN_H_ +#define TERA_SDK_GLOBAL_TXN_H_ + +#include +#include +#include +#include + +#include "common/mutex.h" +#include "io/coding.h" +#include "proto/table_meta.pb.h" +#include "sdk/global_txn_internal.h" +#include "sdk/single_row_txn.h" +#include "sdk/sdk_utils.h" +#include "sdk/table_impl.h" +#include "sdk/sdk_zk.h" +#include "tera.h" +#include "common/counter.h" +#include "common/timer.h" + +namespace tera { + +class Cell; +class Write; +class GlobalTxnInternal; +class CellReaderContext; +class InternalReaderContext; +class PrewriteContext; + +class GlobalTxn : public Transaction { +public: + static Transaction* NewGlobalTxn(tera::Client* client, + common::ThreadPool* thread_pool, + sdk::ClusterFinder* tso_cluster); + + virtual ~GlobalTxn(); + + virtual void ApplyMutation(RowMutation* row_mu); + virtual ErrorCode Get(RowReader* row_reader); + virtual ErrorCode Commit(); + + virtual int64_t GetStartTimestamp() { return start_ts_; } + virtual int64_t GetCommitTimestamp() { return commit_ts_; } + + virtual const ErrorCode& GetError() { return status_; } + + typedef void (*Callback)(Transaction* transaction); + + virtual void SetCommitCallback(Callback callback) { + user_commit_callback_ = callback; + } + + virtual Callback GetCommitCallback() { + return user_commit_callback_; + } + + virtual void SetContext(void* context) { + user_commit_context_ = context; + } + + virtual void* GetContext() { + return user_commit_context_; + } + + virtual void Ack(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier); + + virtual void Notify(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier); + + virtual void SetIsolation(const IsolationLevel& isolation_level); + + virtual IsolationLevel Isolation() { return isolation_level_; } + + virtual void SetTimeout(int64_t timeout_ms); + + virtual int64_t Timeout(); + +private: + // ----------------------- begin get process --------------------------- // + // read one cell from db + // + // read "lock", "write", "data" columns result from db, + // use async interface of tera [RowReader] + void AsyncGetCell(Cell* cell, RowReaderImpl* user_reader_impl, InternalReaderContext* ctx); + + // check lock write and build cell result + // (1) check read result, if failed will call [MergeCellToRow] + // (2) maybe call [BackoffAndMaybeCleanupLock] and call [AsyncGetCell] retry + // (3) maybe call [FindValueFromResultRow] and call [MergeCellToRow] + void DoGetCellReaderCallback(RowReader* reader); + + // check "lock" and "write" columns, do like percolator + // maybe call CleanLock, RollForward or wait some times + // + // if try_clean == true will be CleanLock not wait + void BackoffAndMaybeCleanupLock(RowReader::TRow& row, + const Cell& cell, + const bool try_clean, + ErrorCode* status); + void CleanLock(const Cell& cell, const tera::PrimaryInfo& primary, + ErrorCode* status); + + void RollForward(const Cell& cell, + const tera::PrimaryInfo& primary, + int lock_type, + ErrorCode* status); + + // get result form "result_row" and set into "target_cell" + bool FindValueFromResultRow(RowReader::TRow& result_row, Cell* target_cell); + + // call GetCellCallback function @ other thread + void MergeCellToRow(RowReader* internal_reader, const ErrorCode& status); + + // set cell result, merge to value_list and call user_reader_callback + void GetCellCallback(CellReaderContext* ctx); + + void SetReaderStatusAndRunCallback(RowReaderImpl* reader_impl, ErrorCode* status); + + // ------------- begin commit prewrite (commit phase1) ----------------- // + void SaveWrite(const std::string& tablename, + const std::string& row_key, + tera::Write& w); + + // commit entry + // + // do [commit phase1], [commit phase2] will begin at callback + void InternalCommit(); + + // [prewrite] Step(1): + // read "data", "lock", "write" column from tera + // + // aysnc prewrite one row use single_row_txn + void AsyncPrewrite(std::vector* same_row_writes); + + // [prewrite] Step(2): + // a) verify [prewrite] step(1) read result status and no conflict + // b) write "lock" and "data" column to tera, + // through same single_row_txn in step(1) + // + // call by [prewrite] step(1),through reader callback + void DoPrewriteReaderCallback(RowReader* reader); + + // prewrite Step(3): + // verify [prewrite] step(2) single_row_txn commit status, + // if the last prewrite callback and status ok, will call [commit] + // + // call by [prewrite] step(2), through single_row_txn commit callback + void DoPrewriteCallback(SingleRowTxn* single_row_txn); + void RunAfterPrewriteFailed(PrewriteContext* ctx); + + // --------------------- begin commit phase2 ---------------------- // + + // commit phase2 Step(1): + // a) get timestamp from timeoracle for commit_ts + // b) sync commit primary write through single_row_txn + // (for this gtxn, on this step only one thread can work) + // c) call [commit phase2] step(2) in a loop + // + // call by [prewrite] step(3) + void InternalCommitPhase2(); + + void VerifyPrimaryLocked(); + + void DoVerifyPrimaryLockedCallback(RowReader* reader); + + void CommitPrimary(SingleRowTxn* primary_single_txn); + + void CheckPrimaryStatusAndCommmitSecondaries(Transaction* primary_single_txn); + + // commit phase2 Step(2): + // async commit secondaries writes through RowMutaion + // + // call by [commit phase2] step(1) + void AsyncCommitSecondaries(std::vector* same_row_writes); + + void DoCommitSecondariesCallback(RowMutation* mutation); + + // commit phase2 Step(3): + // async do ack through RowMutaion + // + // call by [commit phase2] step(1) + void AsyncAck(std::vector* same_row_acks); + + void DoAckCallback(RowMutation* mutation); + + // commit phase2 Step(4): + // async do notify through RowMutaion + // + // call by [commit phase2] step(1) + void AsyncNotify(std::vector* same_row_notifies); + + void DoNotifyCallback(RowMutation* mutation); + + /// if user want to delete this transaction, + /// before any async tasks of this transaction finished for failed + void WaitForComplete(); + + void SetLastStatus(ErrorCode* status); + + void RunUserCallback(); + + // -------------------- end commit phase1 and phase2 ------------------- // +private: + GlobalTxn(tera::Client* client, + common::ThreadPool* thread_pool, + sdk::ClusterFinder* tso_cluster); + + GlobalTxn(const GlobalTxn&) = delete; + void operator=(const GlobalTxn&) = delete; + + // + typedef std::pair TableWithRowkey; + // tableWithRowkey -> set(write) + typedef std::map> WriterMap; + + std::unique_ptr gtxn_internal_; + ErrorCode status_; + bool status_returned_; // if true gtxn will not change "status_" + + Write* primary_write_; + WriterMap writes_; + WriterMap::iterator prewrite_iterator_; + int64_t writes_size_; + + int64_t start_ts_; + int64_t prewrite_start_ts_; + int64_t commit_ts_; + IsolationLevel isolation_level_; + std::string serialized_primary_; + + WriterMap acks_; + WriterMap notifies_; + + mutable Mutex mu_; + std::atomic finish_; + mutable Mutex finish_mutex_; + common::CondVar finish_cond_; + + std::atomic has_commited_; + + Callback user_commit_callback_; + void* user_commit_context_; + + common::ThreadPool* thread_pool_; + sdk::ClusterFinder* tso_cluster_; + + int64_t timeout_ms_; + + Counter put_fail_cnt_; // put begin +1, done -1 + Counter commit_secondaries_done_cnt_; + Counter ack_done_cnt_; + Counter notify_done_cnt_; + + Counter writes_cnt_; + Counter acks_cnt_; + Counter notifies_cnt_; + std::atomic all_task_pushed_; +}; + +} // namespace tera + +#endif // TERA_SDK_GLOBAL_TXN_H_ diff --git a/src/sdk/global_txn_internal.cc b/src/sdk/global_txn_internal.cc new file mode 100644 index 000000000..8c69651ed --- /dev/null +++ b/src/sdk/global_txn_internal.cc @@ -0,0 +1,559 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "sdk/global_txn_internal.h" + +#include "common/metric/metric_counter.h" +#include "common/this_thread.h" +#include "proto/table_meta.pb.h" +#include "proto/tabletnode_rpc.pb.h" +#include "sdk/global_txn.h" +#include "sdk/read_impl.h" +#include "sdk/sdk_metric_name.h" + +DECLARE_bool(tera_gtxn_test_opened); +DECLARE_string(tera_gtxn_test_flagfile); +DECLARE_int32(tera_gtxn_all_puts_size_limit); +DECLARE_int32(tera_sdk_timeout); + +namespace tera { + +// for record sdk all transactions perf +tera::MetricCounter gtxn_read_delay_us(kGTxnReadDelayMetric, kGTxnLabelRead); +tera::MetricCounter gtxn_read_cnt(kGTxnReadCountMetric, kGTxnLabelRead); +tera::MetricCounter gtxn_read_fail_cnt(kGTxnReadFailCountMetric, kGTxnLabelRead); +tera::MetricCounter gtxn_read_retry_cnt(kGTxnReadRetryCountMetric, kGTxnLabelRead); +tera::MetricCounter gtxn_read_rollback_cnt(kGTxnReadRollBackCountMetric, kGTxnLabelRead); +tera::MetricCounter gtxn_read_rollforward_cnt(kGTxnReadRollForwardCountMetric, kGTxnLabelRead); + +tera::MetricCounter gtxn_commit_delay_us(kGTxnCommitDelayMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_commit_cnt(kGTxnCommitCountMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_commit_fail_cnt(kGTxnCommitFailCountMetric, kGTxnLabelCommit); + +tera::MetricCounter gtxn_prewrite_delay_us(kGTxnPrewriteDelayMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_prewrite_cnt(kGTxnPrewriteCountMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_prewrite_fail_cnt(kGTxnPrewriteFailCountMetric, kGTxnLabelCommit); + +tera::MetricCounter gtxn_primary_delay_us(kGTxnPrimaryDelayMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_primary_cnt(kGTxnPrimaryCountMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_primary_fail_cnt(kGTxnPrimaryFailCountMetric, kGTxnLabelCommit); + +tera::MetricCounter gtxn_secondaries_delay_us(kGTxnSecondariesDelayMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_secondaries_cnt(kGTxnSecondariesCountMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_secondaries_fail_cnt(kGTxnSecondariesFailCountMetric, kGTxnLabelCommit); + +tera::MetricCounter gtxn_acks_delay_us(kGTxnAcksDelayMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_acks_cnt(kGTxnAcksCountMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_acks_fail_cnt(kGTxnAcksFailCountMetric, kGTxnLabelCommit); + +tera::MetricCounter gtxn_notifies_delay_us(kGTxnNotifiesDelayMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_notifies_cnt(kGTxnNotifiesCountMetric, kGTxnLabelCommit); +tera::MetricCounter gtxn_notifies_fail_cnt(kGTxnNotifiesFailCountMetric, kGTxnLabelCommit); + +tera::MetricCounter gtxn_tso_delay_us(kGTxnTsoDelayMetric, kGTxnLabelTso); +tera::MetricCounter gtxn_tso_req_cnt(kGTxnTsoRequestCountMetric, kGTxnLabelTso); + +GlobalTxnInternal::GlobalTxnInternal(tera::Client* client) + : TEST_GtxnTestHelper_(NULL), + start_ts_(0), + prewrite_start_ts_(0), + terminal_time_(0), + is_timeout_(false), + client_(client) {} + +GlobalTxnInternal::~GlobalTxnInternal() { + PerfReport(); +} + +void GlobalTxnInternal::SetStartTimestamp(int64_t ts) { + start_ts_ = ts; + prewrite_start_ts_ = ts; +} + +bool GlobalTxnInternal::CheckTable(Table* table, ErrorCode* status) { + assert(table != NULL); + MutexLock lock(&tables_mu_); + TableInfoMap::const_iterator tables_it = tables_.find(table->GetName()); + if (tables_it == tables_.end()) { + TableImpl* table_impl = static_cast(table); + TableSchema schema = table_impl->GetTableSchema(); + if (IsTransactionTable(schema)) { + std::set gtxn_cfs; + FindGlobalTransactionCfs(schema, >xn_cfs); + if (gtxn_cfs.size() > 0) { + tables_[table->GetName()] = std::pair >(table, gtxn_cfs); + return true; + } else { + status->SetFailed(ErrorCode::kBadParam, + "schema check fail: " + table->GetName() + " haven't gtxn cf"); + return false; + } + } else { + status->SetFailed(ErrorCode::kBadParam, + "schema check fail: " + table->GetName() + " not txn table"); + return false; + } + } + return true; +} + + + +bool GlobalTxnInternal::IsLockedByOthers(RowReader::TRow& row, const Cell& cell) { + if (row[cell.ColFamily()].find(cell.LockName()) != row[cell.ColFamily()].end()) { + for (auto k = row[cell.ColFamily()][cell.LockName()].rbegin(); + k != row[cell.ColFamily()][cell.LockName()].rend(); ++k) { + if (k->first < start_ts_) { + return true; + } + } + } + return false; +} + +bool GlobalTxnInternal::SuspectLive(const tera::PrimaryInfo& primary_info) { + std::string session_str = primary_info.client_session(); + VLOG(12) << "suppect_live : " << session_str; + ClientImpl* client_impl = static_cast(client_); + return client_impl->IsClientAlive(session_str); +} + +bool GlobalTxnInternal::VerifyUserRowReader(RowReader* user_reader) { + RowReaderImpl* reader_impl = static_cast(user_reader); + const RowReader::ReadColumnList& read_col_list = user_reader->GetReadColumnList(); + ErrorCode status; + bool schema_valid = true; + std::string reason(""); + + Table* table = reader_impl->GetTable(); + if (!CheckTable(table, &status)) { + // table schema error for gtxn + reader_impl->SetError(status.GetType(), status.GetReason()); + return false; + } else if (read_col_list.size() == 0) { + // TODO support read full + reason = "not support read full line in global transaction"; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + return false; + } else if (reader_impl->GetSnapshot() != 0) { + reason = "not support read a snapshot in global transaction"; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + return false; + } + + // check schema valid + const std::string& tablename = table->GetName(); + + for (auto it = read_col_list.begin(); it != read_col_list.end(); ++it) { + const std::string& column_family = it->first; + const std::set& qualifier_set = it->second; + + if (qualifier_set.size() == 0) { + reason = "not set any qualifier"; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + schema_valid = false; + break; + } + if (!IsGTxnColumnFamily(tablename, column_family)) { + reason = "table:" + tablename + ",cf:" + column_family + " not set gtxn=\"on\""; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + schema_valid = false; + break; + } + for (auto q_it = qualifier_set.begin(); q_it != qualifier_set.end(); ++q_it) { + const std::string& qualifier = *q_it; + + if (BadQualifier(qualifier)) { + reason = "table:" + tablename + ",qu:" + qualifier + " can't end with \"_*_\""; + LOG(ERROR) << "[gtxn][get] " << reason; + reader_impl->SetError(ErrorCode::kBadParam, reason); + schema_valid = false; + break; + } + } + } + return schema_valid; +} + +bool GlobalTxnInternal::VerifyUserRowMutation(RowMutation* user_mu) { + RowMutationImpl* row_mu_impl = static_cast(user_mu); + Table* table = user_mu->GetTable(); + + ErrorCode status; + if (!CheckTable(table, &status)) { + // table schema error for gtxn; + row_mu_impl->SetError(status.GetType(), status.GetReason()); + return false; + } else if (row_mu_impl->MutationNum() <= 0) { + // nothing to mutation + row_mu_impl->SetError(ErrorCode::kBadParam, "nothing to mutation"); + return false; + } + + std::string reason(""); + const std::string& tablename = table->GetName(); + + for (size_t i = 0; i < user_mu->MutationNum(); ++i) { + const RowMutation::Mutation& mu = user_mu->GetMutation(i); + // check this qualifier is right + if (BadQualifier(mu.qualifier)) { + reason = "@table" + tablename + ",qu:" + mu.qualifier + + " can't end with \"_*_\""; + LOG(ERROR) << "[gtxn][apply_mutation] " << reason; + row_mu_impl->SetError(ErrorCode::kBadParam, reason); + return false; + } else if (!IsGTxnColumnFamily(tablename, mu.family)) { + // check column has set gtxn="on" + reason = "@table" + tablename + ",cf:" + mu.family + + " not set gtxn=\"on\""; + LOG(ERROR) << "[gtxn][apply_mutation] " << reason; + row_mu_impl->SetError(ErrorCode::kBadParam, reason); + return false; + } else if (mu.type != RowMutation::kPut && mu.type != RowMutation::kDeleteColumn + && mu.type != RowMutation::kDeleteColumns) { + + reason = "@table " + tablename + ",row mutation type is " + + std::to_string(mu.type); + LOG(ERROR) << "[gtxn][apply_mutation] " << reason; + row_mu_impl->SetError(ErrorCode::kGTxnNotSupport, reason); + return false; + } + } + return true; +} + +bool GlobalTxnInternal::VerifyWritesSize(RowMutation* user_mu, int64_t* size) { + RowMutationImpl* row_mu_impl = static_cast(user_mu); + *size += row_mu_impl->Size(); + if (*size > FLAGS_tera_gtxn_all_puts_size_limit) { + LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "] failed, " + << "mutations size " << *size << " > limit (" + << FLAGS_tera_gtxn_all_puts_size_limit << ")"; + row_mu_impl->SetError(ErrorCode::kGTxnDataTooLarge); + return false; + } else if ( *size <= 0) { + LOG(ERROR) << "[gtxn][apply_mutation][" << start_ts_ << "] failed, " + << "mutaions size " << *size; + row_mu_impl->SetError(ErrorCode::kBadParam); + return false; + } + return true; +} + +bool GlobalTxnInternal::PrimaryIsLocked(const tera::PrimaryInfo& primary, + const int64_t lock_ts, + ErrorCode* status) { + Table* table = FindTable(primary.table_name()); + if (table == NULL) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, + "not found primary table and open failed"); + return false; + } + if (!CheckTable(table, status)) { + status->SetFailed(ErrorCode::kGTxnPrimaryLost, + "primary table check failed" + status->ToString()); + return false; + } + const Cell& cell = Cell(table, primary.row_key(), + primary.column_family(), primary.qualifier()); + + std::unique_ptr reader(table->NewRowReader(cell.RowKey())); + reader->AddColumn(cell.ColFamily(), cell.LockName()); + reader->SetTimeRange(lock_ts, lock_ts); + table->Get(reader.get()); + + if (reader->GetError().GetType() != tera::ErrorCode::kOK && + reader->GetError().GetType() != tera::ErrorCode::kNotFound) { + *status = reader->GetError(); + return false; + } + while (!reader->Done()) { + if (reader->Timestamp() == lock_ts) { + VLOG(12) << DebugString(cell, "other transaction on prewrite @" + std::to_string(lock_ts)); + return true; + } + reader->Next(); + } + return false; +} + +void GlobalTxnInternal::BuildRowReaderForPrewrite(const std::vector& ws, RowReader* reader) { + for (auto& w : ws){ + reader->AddColumn(w.ColFamily(), w.Qualifier()); + reader->AddColumn(w.ColFamily(), w.LockName()); + reader->AddColumn(w.ColFamily(), w.WriteName()); + reader->SetTimeRange(0, kMaxTimeStamp); + reader->SetMaxVersions(UINT32_MAX); + } +} + +void GlobalTxnInternal::BuildRowMutationForPrewrite(std::vector* ws, + RowMutation* prewrite_mu, + const std::string& primary_info) { + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; // one cell + prewrite_mu->Put(w.ColFamily(), + w.LockName(), + EncodeLockValue(w.WriteType(), primary_info), + (int64_t)prewrite_start_ts_); + prewrite_mu->Put(w.ColFamily(), + w.Qualifier(), + w.Value(), + (int64_t)prewrite_start_ts_); + } +} + +void GlobalTxnInternal::BuildRowMutationForCommit(std::vector* ws, + RowMutation* commit_mu, + const int64_t commit_ts) { + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; // one cell + // value = type + start_ts + commit_mu->Put(w.ColFamily(), w.WriteName(), + EncodeWriteValue(w.WriteType(), prewrite_start_ts_), + commit_ts); + commit_mu->DeleteColumns(w.ColFamily(), w.LockName(), commit_ts); + } +} + +void GlobalTxnInternal::BuildRowMutationForAck(std::vector* ws, + RowMutation* commit_mu) { + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; // one cell + commit_mu->DeleteColumns(kNotifyColumnFamily, w.NotifyName(), start_ts_); + } +} + +void GlobalTxnInternal::BuildRowMutationForNotify(std::vector* ws, + RowMutation* commit_mu, + const int64_t commit_ts) { + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; // one cell + commit_mu->Put(kNotifyColumnFamily, w.NotifyName(), + Int64ToEncodedString(commit_ts), commit_ts); + } +} + +void GlobalTxnInternal::SetCommitDuration(int64_t timeout_ms) { + terminal_time_ = timeout_ms + get_millis(); +} + +void GlobalTxnInternal::SetInternalSdkTaskTimeout(RowReader* reader) { + int64_t duration = terminal_time_ - get_millis(); + if (duration < 0) { + is_timeout_ = true; + duration = 1; + } + // duration should not larger than FLAGS_tera_sdk_timeout + duration = duration > FLAGS_tera_sdk_timeout ? FLAGS_tera_sdk_timeout : duration; + reader->SetTimeOut(duration); +} + +void GlobalTxnInternal::SetInternalSdkTaskTimeout(RowMutation* mutation) { + int64_t duration = terminal_time_ - get_millis(); + if (duration < 0) { + is_timeout_ = true; + duration = 1; + } + // duration should not larger than FLAGS_tera_sdk_timeout + duration = duration > FLAGS_tera_sdk_timeout ? FLAGS_tera_sdk_timeout : duration; + mutation->SetTimeOut(duration); +} + +bool GlobalTxnInternal::IsTimeOut() { + return is_timeout_; +} + +bool GlobalTxnInternal::IsPrimary(const tera::Cell& cell, + const tera::PrimaryInfo& primary_info) { + return primary_info.table_name() == cell.TableName() + && primary_info.row_key() == cell.RowKey() + && primary_info.column_family() == cell.ColFamily() + && primary_info.qualifier() == cell.Qualifier(); +} + +Table* GlobalTxnInternal::FindTable(const std::string& tablename) { + assert(!tablename.empty()); + MutexLock lock(&tables_mu_); + TableInfoMap::const_iterator it = tables_.find(tablename); + if (it == tables_.end()) { + ErrorCode status; + Table* t = client_->OpenTable(tablename, &status); + if (t == NULL || status.GetType() != ErrorCode::kOK) { + LOG(ERROR) << "[gtxn] can't create table :" << tablename << "," << status.ToString(); + return NULL; + } + return t; + } + return (it->second).first; +} + +bool GlobalTxnInternal::ConflictWithOtherWrite(const std::vector* ws, + RowReader* reader, + ErrorCode* status) { + RowReader::TRow row; + reader->ToMap(&row); + + // check every cell + for (auto it = ws->begin(); it != ws->end(); ++it) { + const Write& w = *it; + const std::string& w_cf = w.ColFamily(); + if (row.find(w_cf) == row.end()) { + VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() + << "not found [" << w_cf << "]"; + continue; + } else { + // check Write column + const std::string& w_write = w.WriteName(); + if (row[w_cf].find(w_write) != row[w_cf].end()) { + for (auto k = row[w_cf][w_write].rbegin(); k != row[w_cf][w_write].rend(); ++k) { + std::string write_value = k->second; + int write_type; + int64_t data_start_ts; + DecodeWriteValue(write_value, &write_type, &data_start_ts); + VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() + << " prewrite_start_ts:" << prewrite_start_ts_ + << " found _W_ :" << k->first + << " type: " << write_type + << " data_ts: " << data_start_ts; + if (k->first >= prewrite_start_ts_) { + status->SetFailed(ErrorCode::kGTxnWriteConflict, + "writing by others ts:" + std::to_string(k->first)); + return true; + } + } + } else { + VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() + << "not found _W_ col"; + } + // check Lock column + const std::string& w_lock = w.LockName(); + if (row[w_cf].find(w_lock) != row[w_cf].end()) { + auto k = row[w_cf][w_lock].rbegin(); + if (k != row[w_cf][w_lock].rend()) { + VLOG(12) << "[gtxn][prewrite][stxn_read]" << w.DebugString() + << "locked@: " << k->first; + status->SetFailed(ErrorCode::kGTxnLockConflict, + w.DebugString() + "locked@:" + std::to_string(k->first)); + return true; + } + } + } + } + return false; +} + +void GlobalTxnInternal::SetPrewriteStartTimestamp(const int64_t prewrite_start_ts) { + prewrite_start_ts_ = prewrite_start_ts; +} + +bool GlobalTxnInternal::IsGTxnColumnFamily(const std::string& tablename, + const std::string& column_family) { + MutexLock lock(&tables_mu_); + auto it = tables_.find(tablename); + if (it != tables_.end()) { + std::set& gtxn_cfs = (it->second).second; + auto cfs_it = gtxn_cfs.find(column_family); + if (cfs_it != gtxn_cfs.end()) { + return true; + } + } + return false; +} + +std::string GlobalTxnInternal::GetClientSession() { + ClientImpl* client_impl = static_cast(client_); + return client_impl->ClientSession(); +} + +std::string GlobalTxnInternal::DebugString(const Cell& cell, const std::string& msg) const { + std::stringstream ss; + ss << msg << " @ [" << cell.Table()->GetName() << ":" + << cell.RowKey() << ":" << cell.ColFamily() + << ":" << cell.Qualifier() << ":" << cell.Timestamp() << "]"; + return ss.str(); +} + +int64_t GlobalTxnInternal::TEST_Init(const std::string& conf_file) { + if (FLAGS_tera_gtxn_test_opened) { + TEST_GtxnTestHelper_ = new GlobalTxnTestHelper(conf_file); + TEST_GtxnTestHelper_->LoadTxnConf(); + start_ts_ = TEST_GtxnTestHelper_->GetStartTs(); + prewrite_start_ts_ = TEST_GtxnTestHelper_->GetPrewriteStartTs(); + } + return start_ts_; +} + +void GlobalTxnInternal::TEST_GetSleep() { + if (FLAGS_tera_gtxn_test_opened) { + TEST_GtxnTestHelper_->GetWait(start_ts_); + } +} + +void GlobalTxnInternal::TEST_Sleep() { + if (FLAGS_tera_gtxn_test_opened) { + TEST_GtxnTestHelper_->Wait(start_ts_); + } +} + +void GlobalTxnInternal::TEST_Destory() { + if (FLAGS_tera_gtxn_test_opened) { + delete TEST_GtxnTestHelper_; + } +} + +int64_t GlobalTxnInternal::TEST_GetCommitTimestamp() { + return TEST_GtxnTestHelper_->GetCommitTs(); +} + +int64_t GlobalTxnInternal::TEST_GetPrewriteStartTimestamp() { + return TEST_GtxnTestHelper_->GetPrewriteStartTs(); +} + +void GlobalTxnInternal::PerfReadDelay(int64_t begin_time, int64_t finish_time) { + read_cost_time_.Add(finish_time - begin_time); +} +void GlobalTxnInternal::PerfCommitDelay(int64_t begin_time, int64_t finish_time) { + commit_cost_time_.Add(finish_time - begin_time); +} + +void GlobalTxnInternal::PerfPrewriteDelay(int64_t begin_time, int64_t finish_time) { + prewrite_cost_time_.Add(finish_time - begin_time); +} + +void GlobalTxnInternal::PerfPrimaryCommitDelay(int64_t begin_time, int64_t finish_time) { + primary_cost_time_.Add(finish_time - begin_time); +} + +void GlobalTxnInternal::PerfSecondariesCommitDelay(int64_t begin_time, int64_t finish_time) { + secondaries_cost_time_.Add(finish_time - begin_time); +} + +void GlobalTxnInternal::PerfAckDelay(int64_t begin_time, int64_t finish_time) { + acks_cost_time_.Add(finish_time - begin_time); +} + +void GlobalTxnInternal::PerfNotifyDelay(int64_t begin_time, int64_t finish_time) { + notifies_cost_time_.Add(finish_time - begin_time); +} + +void GlobalTxnInternal::PerfReport() { + gtxn_read_delay_us.Add(read_cost_time_.Clear()); + gtxn_commit_delay_us.Add(commit_cost_time_.Clear()); + gtxn_prewrite_delay_us.Add(prewrite_cost_time_.Clear()); + gtxn_primary_delay_us.Add(primary_cost_time_.Clear()); + gtxn_secondaries_delay_us.Add(secondaries_cost_time_.Clear()); + gtxn_acks_delay_us.Add(acks_cost_time_.Clear()); + gtxn_notifies_delay_us.Add(notifies_cost_time_.Clear()); +} + +} // namespace tera + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/global_txn_internal.h b/src/sdk/global_txn_internal.h new file mode 100644 index 000000000..95eaae825 --- /dev/null +++ b/src/sdk/global_txn_internal.h @@ -0,0 +1,366 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_SDK_GLOBAL_TXN_INTERNAL_H_ +#define TERA_SDK_GLOBAL_TXN_INTERNAL_H_ + +#include +#include +#include +#include +#include + +#include "common/mutex.h" +#include "io/coding.h" +#include "sdk/global_txn.h" +#include "sdk/test/global_txn_testutils.h" +#include "sdk/sdk_utils.h" +#include "sdk/single_row_txn.h" +#include "sdk/table_impl.h" +#include "sdk/timeoracle_client_impl.h" +#include "tera.h" +#include "common/timer.h" + +namespace tera { + +class Cell; +class GlobalTxnTestHelper; +class Write; + +inline void PrintCostTime(const std::string& msg, int64_t begin_time) { + VLOG(12) << msg <<" cost: " << get_micros() - begin_time; +} + +inline std::string Int64ToEncodedString(int64_t n) { + char buf[sizeof(int64_t)]; + io::EncodeBigEndian(buf, n); + std::string s (buf, sizeof(int64_t)); + return s; +} + +inline int64_t EncodedStringToInt64(const std::string& s) { + return io::DecodeBigEndain(s.c_str()); +} + +inline std::string PackLockName(const std::string& qualifier) { + return qualifier + "_L_"; +} + +inline std::string PackWriteName(const std::string& qualifier) { + return qualifier + "_W_"; +} + +inline std::string EncodeLockValue(int type, const std::string& primary_str) { + return (char)type + primary_str; +} + +inline bool DecodeLockValue(const std::string& value, + int* type, tera::PrimaryInfo* info) { + if (value.length() > 1) { + *type = (int)value[0]; + return info->ParseFromString(value.substr(1)); + } else { + *type = -1; + return false; + } +} + +inline std::string EncodeWriteValue(int type, int64_t timestamp) { + return (char)type + Int64ToEncodedString(timestamp); +} + +inline bool DecodeWriteValue(const std::string& value, int* type, int64_t* timestamp) { + if (value.length() > 1) { + *type = (int)value[0]; + *timestamp = EncodedStringToInt64(value.substr(1)); + return true; + } else { + *type = -1; + *timestamp = -1; + return false; + } +} + +inline std::string PackNotifyName(const std::string& column_family, + const std::string& qualifier) { + return column_family + ":" + qualifier; +} + +inline bool BadQualifier(const std::string& qualifier) { + size_t q_len = qualifier.length(); + return q_len >= 3 && qualifier[q_len - 1] == '_' && qualifier[q_len - 3] == '_'; +} + +struct PrewriteContext { + std::vector* ws; + Transaction* gtxn; + std::string table_name; + std::string row_key; + ErrorCode status; + PrewriteContext(std::vector* same_row_ws, + Transaction* txn, + const std::string& tablename, + const std::string& rowkey) : + ws(same_row_ws), + gtxn(txn), + table_name(tablename), + row_key(rowkey) { + status.SetFailed(ErrorCode::kOK); + } + const std::string DebugString() const { + return "[tablename=" + table_name + ",rowkey=" + row_key + "]" + status.ToString(); + } +}; +// one user reader will have one InternalReaderContext +struct InternalReaderContext { + int expected_cell_cnt; + int active_cell_cnt; + int fail_cell_cnt; + int not_found_cnt; + RowReader* user_reader; + Transaction* gtxn; + std::map cell_map; + RowResult results; + ErrorCode last_err; + + InternalReaderContext(int expected_cnt, RowReader* reader, Transaction* txn) + : expected_cell_cnt(expected_cnt), + active_cell_cnt(0), + fail_cell_cnt(0), + not_found_cnt(0), + user_reader(reader), + gtxn(txn) {} +}; +// one cell reader will have one CellReaderContext +struct CellReaderContext { + Cell* cell; + InternalReaderContext* internal_reader_ctx; + ErrorCode status; + CellReaderContext(Cell* c, InternalReaderContext* ctx) + : cell(c), + internal_reader_ctx(ctx) {} +}; + +class Cell { +public: + Cell(tera::Table* table, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier, + const int64_t timestamp = 0, + const std::string& value = "") : + table_(table), + row_key_(row_key), + column_family_(column_family), + qualifier_(qualifier), + timestamp_(timestamp), + value_(value), + tablename_("") { + + assert(table_ != NULL); + tablename_ = table_->GetName(); + } + + tera::Table* Table() const { return table_; } + + const std::string TableName() const { return tablename_; } + const std::string& RowKey() const { return row_key_; } + const std::string& ColFamily() const { return column_family_; } + const std::string& Qualifier() const { return qualifier_; } + const std::string LockName() const { return PackLockName(qualifier_); } + const std::string WriteName() const { return PackWriteName(qualifier_); } + const std::string NotifyName() const { return PackNotifyName(column_family_, qualifier_); } + const int64_t Timestamp() const { return timestamp_; } + void SetTimestamp(const int64_t timestamp) { + timestamp_ = timestamp; + } + const std::string& Value() const { return value_; } + void SetValue(const std::string& value) { + value_ = value; + } +private: + tera::Table* table_; + std::string row_key_; + std::string column_family_; + std::string qualifier_; + int64_t timestamp_; + std::string value_; + std::string tablename_; +}; + +class Write { +public: + Write(const Cell& cell, const int& type = 0) + : cell_(cell), + type_(type), + is_primary_(false) {} + + int WriteType() const { return type_; } + bool IsPrimary() const { return is_primary_; } + tera::Table* Table() const { return cell_.Table(); } + const std::string TableName() const { return cell_.TableName(); } + const std::string& RowKey() const { return cell_.RowKey(); } + const std::string& ColFamily() const { return cell_.ColFamily(); } + const std::string& Qualifier() const { return cell_.Qualifier(); } + const std::string LockName() const { return cell_.LockName(); } + const std::string WriteName() const { return cell_.WriteName(); } + const std::string NotifyName() const { return cell_.NotifyName(); } + const int64_t Timestamp() const { return cell_.Timestamp(); } + const std::string& Value() const { return cell_.Value(); } + const int64_t GetSize() { + return cell_.RowKey().length() + cell_.ColFamily().length() + + cell_.Qualifier().length() + cell_.Value().length(); + } + bool IsSameRow(Write* w) { + return RowKey() == w->RowKey() + && Table() == w->Table(); + } + + void Serialize(const int64_t start_ts, + const std::string& session, + std::string* primary_info) { + tera::PrimaryInfo primary; + primary.set_table_name(TableName()); + primary.set_row_key(RowKey()); + primary.set_column_family(ColFamily()); + primary.set_qualifier(Qualifier()); + primary.set_gtxn_start_ts(start_ts); + primary.set_client_session(session), + primary.SerializeToString(primary_info); + } + + const std::string DebugString() const { + std::stringstream ss; + ss <<"[" << TableName() << ":" << RowKey() << ":" << ColFamily() + << ":" << Qualifier() << "]"; + return ss.str(); + } + +private: + tera::Cell cell_; + int type_; + bool is_primary_; +}; + +class GlobalTxnInternal { +public: + friend class GlobalTxn; + GlobalTxnInternal(tera::Client* client); + + ~GlobalTxnInternal(); + // for common + void SetStartTimestamp(int64_t ts); + + bool CheckTable(Table* table, ErrorCode* status); + + Table* FindTable(const std::string& tablename); + + bool IsPrimary(const tera::Cell& cell, + const tera::PrimaryInfo& primary_info); + + bool IsGTxnColumnFamily(const std::string& tablename, + const std::string& column_family); + + // for get + bool VerifyUserRowReader(RowReader* user_reader); + + bool PrimaryIsLocked(const tera::PrimaryInfo& primary_info, + const int64_t lock_ts, + ErrorCode* status); + + bool IsLockedByOthers(RowReader::TRow& row, const tera::Cell& cell); + + bool SuspectLive(const tera::PrimaryInfo& primary_info); + + // for prewrite + void BuildRowReaderForPrewrite(const std::vector& ws, RowReader* reader); + + void BuildRowMutationForPrewrite(std::vector* ws, + RowMutation* txn_mu, + const std::string& primary_info); + + bool ConflictWithOtherWrite(const std::vector* ws, + RowReader* reader, + ErrorCode* status); + + // for applyMutation + bool VerifyUserRowMutation(RowMutation* user_mu); + bool VerifyWritesSize(RowMutation* user_mu, int64_t* size); + + // for commit + void BuildRowMutationForCommit(std::vector* ws, + RowMutation* txn_mu, + const int64_t commit_ts); + + void BuildRowMutationForAck(std::vector* ws, RowMutation* txn_mu); + + void BuildRowMutationForNotify(std::vector* ws, + RowMutation* txn_mu, + const int64_t commit_ts); + + void SetPrewriteStartTimestamp(const int64_t prewrite_start_ts); + + // for timeout + void SetCommitDuration(int64_t timeout_ms); + void SetInternalSdkTaskTimeout(RowMutation* mutation); + void SetInternalSdkTaskTimeout(RowReader* reader); + bool IsTimeOut(); + + // for other transaction alive + std::string GetClientSession(); +private: + // for pref + void UpdateTimerCounter(Counter* c) { + c->Set(get_micros() - c->Get()); + } + + // for debug and test + std::string DebugString(const tera::Cell& cell, const std::string& msg) const ; + int64_t TEST_Init(const std::string& conf_file); + void TEST_Sleep(); + void TEST_GetSleep(); + void TEST_Destory(); + int64_t TEST_GetCommitTimestamp(); + int64_t TEST_GetPrewriteStartTimestamp(); + + void PerfReadDelay(int64_t begin_time, int64_t finish_time); + void PerfCommitDelay(int64_t begin_time, int64_t finish_time); + void PerfPrewriteDelay(int64_t begin_time, int64_t finish_time); + void PerfPrimaryCommitDelay(int64_t begin_time, int64_t finish_time); + void PerfSecondariesCommitDelay(int64_t begin_time, int64_t finish_time); + void PerfAckDelay(int64_t begin_time, int64_t finish_time); + void PerfNotifyDelay(int64_t begin_time, int64_t finish_time); + + void PerfReport(); +private: + GlobalTxnInternal(const GlobalTxnInternal&) = delete; + GlobalTxnInternal& operator=(const GlobalTxnInternal&) = delete; + // for test + GlobalTxnTestHelper* TEST_GtxnTestHelper_; + // tablename-> (Table*, set(gtxn_cf_name)) + typedef std::map > > TableInfoMap; + TableInfoMap tables_; + mutable Mutex tables_mu_; + int64_t start_ts_; + int64_t prewrite_start_ts_; + + // for record this transaction perf + Counter read_cost_time_; + Counter commit_cost_time_; + Counter prewrite_cost_time_; + Counter primary_cost_time_; + Counter secondaries_cost_time_; + Counter acks_cost_time_; + Counter notifies_cost_time_; + + int64_t terminal_time_; + std::atomic is_timeout_; + tera::Client* client_; +}; + +} // namespace tera + +#endif // TERA_SDK_GLOBAL_TXN_INTERNAL_H_ diff --git a/src/sdk/http/http.cc b/src/sdk/http/http.cc index 562c647f3..da7b571c0 100644 --- a/src/sdk/http/http.cc +++ b/src/sdk/http/http.cc @@ -13,7 +13,7 @@ #include "proto/http.pb.h" #include "tera.h" -#include "utils/counter.h" +#include "common/counter.h" DECLARE_int32(tera_http_ctrl_thread_num); DECLARE_int32(tera_http_request_thread_num); diff --git a/src/sdk/multi_row_txn.cc b/src/sdk/multi_row_txn.cc deleted file mode 100644 index 7f9b1a8c8..000000000 --- a/src/sdk/multi_row_txn.cc +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "common/thread_pool.h" - -#include "sdk/read_impl.h" -#include "sdk/single_row_txn.h" -#include "sdk/table_impl.h" -#include "sdk/multi_row_txn.h" - -namespace tera { - -Transaction* NewTransaction() { - return MultiRowTxn::NewMultiRowTxn(); -} - -Transaction* MultiRowTxn::NewMultiRowTxn() { - // int64_t start_ts = TimeOracle::GetTimestamp(); - int64_t start_ts = 42; - if (start_ts > 0) { - return new MultiRowTxn(start_ts); - } else { - return NULL; - } -} - -MultiRowTxn::MultiRowTxn(int64_t start_ts) - : start_ts_(start_ts) {} - -MultiRowTxn::~MultiRowTxn() {} - -std::string LockColumnName(const std::string& c) { - return c + "__l__"; // lock -} - -std::string WriteColumnName(const std::string& c) { - return c + "__w__"; // write -} - -bool MultiRowTxn::IsWritingByOthers(RowMutation* row_mu, RowReader* reader) { - return false; -} - -bool MultiRowTxn::IsLockedByOthers(RowMutation* row_mu, RowReader* reader) { - return false; -} - -ErrorCode MultiRowTxn::Prewrite(RowMutation* w, RowMutation* primary) { - ErrorCode status; - return status; -} - -bool MultiRowTxn::LockExists(tera::Transaction* single_row_txn, RowMutation* row_mu) { - return false; -} - -ErrorCode MultiRowTxn::Commit() { - assert(writes_.size() > 0); - - ErrorCode status; - return status; -} - -void MultiRowTxn::ApplyMutation(RowMutation* row_mu) { - assert(row_mu != NULL); - writes_.push_back(row_mu); -} - -ErrorCode MultiRowTxn::Get(RowReader* row_reader) { - assert(row_reader != NULL); - - ErrorCode status; - return status; -} - -} // namespace tera - -/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/multi_row_txn.h b/src/sdk/multi_row_txn.h deleted file mode 100644 index acc9998a6..000000000 --- a/src/sdk/multi_row_txn.h +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2016, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_SDK_TXN_H_ -#define TERA_SDK_TXN_H_ - -#include -#include - -#include "tera.h" - -namespace tera { - -/// cross-row, cross-table transaction -/// 跨行,跨表事务 - -class MultiRowTxn: public Transaction { -public: - static Transaction* NewMultiRowTxn(); - virtual ~MultiRowTxn(); - - virtual ErrorCode Get(RowReader* row_reader); - virtual void ApplyMutation(RowMutation* row_mu); - /// 提交事务 - /// 同步模式下,Commit()的返回值代表了提交操作的结果(成功 或者 失败及其原因) - /// 异步模式下,通过GetError()获取提交结果 - virtual ErrorCode Commit(); - - typedef void (*Callback)(Transaction* transaction); - virtual void SetCommitCallback(Callback callback) {} - virtual Callback GetCommitCallback() { return NULL; } - virtual void SetContext(void* context) {} - virtual void* GetContext() { return NULL; } - virtual const ErrorCode& GetError() { return status_; } - virtual int64_t GetStartTimestamp() { return 0; } - -private: - MultiRowTxn(int64_t start_ts); - MultiRowTxn(const MultiRowTxn&); - void operator=(const MultiRowTxn&); - - bool IsWritingByOthers(RowMutation* row_mu, RowReader* reader); - bool IsLockedByOthers(RowMutation* row_mu, RowReader* reader); - bool LockExists(tera::Transaction* single_row_txn, RowMutation* row_mu); - ErrorCode Prewrite(RowMutation* w, RowMutation* primary); - -private: - int64_t start_ts_; - std::vector writes_; - ErrorCode status_; -}; - -} // namespace tera - -#endif // TERA_SDK_TXN_H_ diff --git a/src/sdk/mutate_impl.cc b/src/sdk/mutate_impl.cc index a90f850d8..634fb3817 100644 --- a/src/sdk/mutate_impl.cc +++ b/src/sdk/mutate_impl.cc @@ -5,7 +5,7 @@ #include "common/base/string_format.h" #include "io/coding.h" #include "sdk/mutate_impl.h" -#include "utils/timer.h" +#include "common/timer.h" namespace tera { diff --git a/src/sdk/mutate_impl.h b/src/sdk/mutate_impl.h index 9b22af41f..c86a98c8d 100644 --- a/src/sdk/mutate_impl.h +++ b/src/sdk/mutate_impl.h @@ -13,7 +13,7 @@ #include "sdk/sdk_task.h" #include "tera.h" #include "types.h" -#include "utils/timer.h" +#include "common/timer.h" namespace tera { diff --git a/src/sdk/read_impl.cc b/src/sdk/read_impl.cc index 352e645b0..35738cc53 100644 --- a/src/sdk/read_impl.cc +++ b/src/sdk/read_impl.cc @@ -19,6 +19,7 @@ RowReaderImpl::RowReaderImpl(TableImpl* table, const std::string& row_key) ts_start_(kOldestTs), ts_end_(kLatestTs), max_version_(1), + max_qualifiers_(std::numeric_limits::max()), snapshot_id_(0), timeout_ms_(0), retry_times_(0), @@ -78,6 +79,12 @@ uint32_t RowReaderImpl::GetMaxVersions() { return max_version_; } +void RowReaderImpl::SetMaxQualifiers(uint64_t max_qualifiers) { + max_qualifiers_ = max_qualifiers; +} +uint64_t RowReaderImpl::GetMaxQualifiers() { + return max_qualifiers_; +} /// 设置超时时间(只影响当前操作,不影响Table::SetReadTimeout设置的默认读超时) void RowReaderImpl::SetTimeOut(int64_t timeout_ms) { @@ -303,6 +310,7 @@ const RowReader::ReadColumnList& RowReaderImpl::GetReadColumnList() { void RowReaderImpl::ToProtoBuf(RowReaderInfo* info) { info->set_key(row_key_); info->set_max_version(max_version_); + info->set_max_qualifiers(max_qualifiers_); info->mutable_time_range()->set_ts_start(ts_start_); info->mutable_time_range()->set_ts_end(ts_end_); diff --git a/src/sdk/read_impl.h b/src/sdk/read_impl.h index cf88cd65c..23dabcda1 100644 --- a/src/sdk/read_impl.h +++ b/src/sdk/read_impl.h @@ -13,7 +13,7 @@ #include "sdk/sdk_task.h" #include "tera.h" #include "types.h" -#include "utils/timer.h" +#include "common/timer.h" namespace tera { @@ -44,6 +44,8 @@ class RowReaderImpl : public RowReader, public SdkTask { void SetMaxVersions(uint32_t max_version); /// 返回max_version uint32_t GetMaxVersions(); + void SetMaxQualifiers(uint64_t max_qualifiers); + uint64_t GetMaxQualifiers(); /// 设置超时时间(只影响当前操作,不影响Table::SetReadTimeout设置的默认读超时) void SetTimeOut(int64_t timeout_ms); /// 设置异步回调, 操作会异步返回 @@ -120,6 +122,8 @@ class RowReaderImpl : public RowReader, public SdkTask { Table* GetTable() { return (Table*)table_; } + uint32_t Size() { return 0; } + private: TableImpl* table_; std::string row_key_; @@ -137,6 +141,7 @@ class RowReaderImpl : public RowReader, public SdkTask { int64_t ts_start_; int64_t ts_end_; uint32_t max_version_; + uint64_t max_qualifiers_; uint64_t snapshot_id_; int64_t timeout_ms_; diff --git a/src/sdk/rowlock_client.cc b/src/sdk/rowlock_client.cc new file mode 100644 index 000000000..ff145eeb5 --- /dev/null +++ b/src/sdk/rowlock_client.cc @@ -0,0 +1,140 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sdk/rowlock_client.h" + +#include +#include + +#include "gflags/gflags.h" + +#include "observer/rowlocknode/ins_rowlock_client_zk_adapter.h" +#include "proto/rowlocknode_rpc.pb.h" +#include "types.h" +#include "utils/utils_cmd.h" + +DECLARE_string(rowlock_server_port); +DECLARE_string(tera_coord_type); +DECLARE_bool(rowlock_test); +DECLARE_int32(rowlock_client_max_fail_times); +DECLARE_bool(mock_rowlock_enable); + +namespace tera{ +namespace observer { + +ThreadPool* RowlockStub::thread_pool_ = NULL; + +void RowlockStub::SetThreadPool(ThreadPool* thread_pool) { + thread_pool_ = thread_pool; +} + +void RowlockStub::SetRpcOption(int32_t max_inflow, int32_t max_outflow, + int32_t pending_buffer_size, int32_t thread_num) { + tera::RpcClientBase::SetOption(max_inflow, max_outflow, + pending_buffer_size, thread_num); +} + +RowlockStub::RowlockStub(const std::string& server_addr, + int32_t rpc_timeout) + : tera::RpcClient(server_addr), + rpc_timeout_(rpc_timeout) { +} + +RowlockStub::~RowlockStub() {} + +bool RowlockStub::TryLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done) { + return SendMessageWithRetry(&RowlockService::Stub::Lock, + request, response, done, "TryLock", + rpc_timeout_, thread_pool_); +} + +bool RowlockStub::UnLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done) { + return SendMessageWithRetry(&RowlockService::Stub::UnLock, + request, response, done, "UnLock", + rpc_timeout_, thread_pool_); +} + +void RowlockClient::SetThreadPool(ThreadPool* thread_pool) { + RowlockStub::SetThreadPool(thread_pool); +} + +RowlockClient::RowlockClient(const std::string& addr, int32_t rpc_timeout) + : local_addr_(tera::utils::GetLocalHostName() + ":" + FLAGS_rowlock_server_port) { + srand((unsigned int)(time(NULL))); + + SetZkAdapter(); +} + +void RowlockClient::Update(const std::vector& addrs) { + std::string addr = addrs[rand() % addrs.size()]; + std::shared_ptr client(new RowlockStub(addr)); + + MutexLock locker(&client_mutex_); + client_.swap(client); +} + +bool RowlockClient::TryLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done) { + std::shared_ptr client; + { + MutexLock locker(&client_mutex_); + // COW ref +1 + client = client_; + } + for (int32_t i = 0; i < FLAGS_rowlock_client_max_fail_times; ++i) { + bool ret = client->TryLock(request, response, done); + if (ret) { + return true; + } + // rpc fail + SetZkAdapter(); + } + return false; +} + +bool RowlockClient::UnLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done) { + std::shared_ptr client; + { + MutexLock locker(&client_mutex_); + // copy-on-write ref+1 + client = client_; + } + for (int32_t i = 0; i < FLAGS_rowlock_client_max_fail_times; ++i) { + bool ret = client->TryLock(request, response, done); + if (ret) { + return true; + } + // rpc fail + SetZkAdapter(); + } + return false; +} + +void RowlockClient::SetZkAdapter() { + // mock rowlock, do not need a real zk adapter + if (FLAGS_mock_rowlock_enable == true) { + return; + } + + if (FLAGS_tera_coord_type == "zk") { + zk_adapter_.reset(new ZkRowlockClientZkAdapter(this, local_addr_)); + } else if (FLAGS_tera_coord_type == "ins") { + zk_adapter_.reset(new InsRowlockClientZkAdapter(this, local_addr_)); + } else { + LOG(ERROR) << "Unknow coord type for rowlock client"; + return; + } + + zk_adapter_->Init(); +} + +} // namespace observer +} // namespace tera diff --git a/src/sdk/rowlock_client.h b/src/sdk/rowlock_client.h new file mode 100644 index 000000000..c475a180f --- /dev/null +++ b/src/sdk/rowlock_client.h @@ -0,0 +1,77 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_SDK_ROWLOCK_CLIENT_H_ +#define TERA_SDK_ROWLOCK_CLIENT_H_ + +#include + +#include +#include + +#include "common/mutex.h" +#include "observer/rowlocknode/zk_rowlock_client_zk_adapter.h" +#include "proto/rpc_client.h" +#include "proto/rowlocknode_rpc.pb.h" + +namespace tera { +namespace observer { + +class RowlockClientZkAdapter; + +class RowlockStub : public tera::RpcClient { +public: + static void SetThreadPool(ThreadPool* thread_pool); + + static void SetRpcOption(int32_t max_inflow = -1, int32_t max_outflow = -1, + int32_t pending_buffer_size = -1, + int32_t thread_num = -1); + + RowlockStub(const std::string& addr = "", int32_t rpc_timeout = 60000); + ~RowlockStub(); + + virtual bool TryLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done = NULL); + + virtual bool UnLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done = NULL); + + +private: + int32_t rpc_timeout_; + static ThreadPool* thread_pool_; +}; + +class RowlockClient { +public: + static void SetThreadPool(ThreadPool* thread_pool); + + RowlockClient(const std::string& addr = "", int32_t rpc_timeout = 60000); + ~RowlockClient() {} + + virtual bool TryLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done = NULL); + + virtual bool UnLock(const RowlockRequest* request, + RowlockResponse* response, + std::function done = NULL); + + void Update(const std::vector& addrs); + +private: + void SetZkAdapter(); + +private: + mutable Mutex client_mutex_; + std::shared_ptr client_; + std::unique_ptr zk_adapter_; + std::string local_addr_; +}; + +} // namespace observer +} // namespace tera +#endif // TERA_SDK_ROWLOCK_CLIENT_H diff --git a/src/sdk/scan.cc b/src/sdk/scan.cc index 846cc7044..f4b630216 100644 --- a/src/sdk/scan.cc +++ b/src/sdk/scan.cc @@ -31,6 +31,10 @@ void ScanDescriptor::SetMaxVersions(int32_t versions) { impl_->SetMaxVersions(versions); } +void ScanDescriptor::SetMaxQualifiers(uint64_t max_qualifiers) { + impl_->SetMaxQualifiers(max_qualifiers); +} + void ScanDescriptor::SetPackInterval(int64_t interval) { impl_->SetPackInterval(interval); } diff --git a/src/sdk/scan_impl.cc b/src/sdk/scan_impl.cc index 786a05547..68049b017 100644 --- a/src/sdk/scan_impl.cc +++ b/src/sdk/scan_impl.cc @@ -5,6 +5,7 @@ #include "sdk/scan_impl.h" #include +#include #include "common/this_thread.h" #include "common/base/string_ext.h" @@ -14,16 +15,18 @@ #include "sdk/filter_utils.h" #include "sdk/sdk_utils.h" #include "sdk/table_impl.h" -#include "utils/atomic.h" -#include "utils/timer.h" +#include "common/atomic.h" +#include "common/timer.h" DECLARE_bool(tera_sdk_batch_scan_enabled); DECLARE_int64(tera_sdk_scan_number_limit); DECLARE_int64(tera_sdk_scan_buffer_size); DECLARE_int32(tera_sdk_max_batch_scan_req); DECLARE_int32(tera_sdk_batch_scan_max_retry); +DECLARE_int32(tera_sdk_sync_scan_max_retry); DECLARE_int64(tera_sdk_scan_timeout); DECLARE_int64(batch_scan_delay_retry_in_us); +DECLARE_int64(sync_scan_delay_retry_in_ms); namespace tera { @@ -374,6 +377,7 @@ ResultStreamSyncImpl::ResultStreamSyncImpl(TableImpl* table, response_(new tera::ScanTabletResponse), result_pos_(0), finish_cond_(&finish_mutex_), + retry_times_(0), finish_(false) { table_ptr_->ScanTabletSync(this); } @@ -392,13 +396,37 @@ bool ResultStreamSyncImpl::Done(ErrorCode* err) { while (1) { const string& scan_end_key = scan_desc_impl_->GetEndRowKey(); /// scan failed - if (response_->status() != kTabletNodeOk) { + while (response_->status() != kTabletNodeOk && + retry_times_ <= FLAGS_tera_sdk_sync_scan_max_retry) { + LOG(WARNING) << "[RETRY " << ++retry_times_ << "] scan error: " + << StatusCodeToString(response_->status()); + + int64_t wait_time; + if(response_->status() == kKeyNotInRange) { + wait_time = FLAGS_sync_scan_delay_retry_in_ms; + } else { + /// Wait less than 60 seconds + wait_time = std::min(static_cast(FLAGS_sync_scan_delay_retry_in_ms * (1 << (retry_times_ - 1))), + static_cast(60000)); + } + + delete response_; + response_ = new tera::ScanTabletResponse; + result_pos_ = 0; + Reset(); + + ThisThread::Sleep(wait_time); + table_ptr_->ScanTabletSync(this); + } + + if(response_->status() != kTabletNodeOk) { if (err) { err->SetFailed(ErrorCode::kSystem, - StatusCodeToString(response_->status())); + StatusCodeToString(response_->status())); } return true; } + if (result_pos_ < response_->results().key_values_size()) { break; } @@ -542,6 +570,7 @@ ScanDescImpl::ScanDescImpl(const string& rowkey) number_limit_(FLAGS_tera_sdk_scan_number_limit), is_async_(FLAGS_tera_sdk_batch_scan_enabled), max_version_(1), + max_qualifiers_(std::numeric_limits::max()), pack_interval_(FLAGS_tera_sdk_scan_timeout), snapshot_(0), value_converter_(&DefaultValueConverter) { @@ -558,6 +587,7 @@ ScanDescImpl::ScanDescImpl(const ScanDescImpl& impl) number_limit_(impl.number_limit_), is_async_(impl.is_async_), max_version_(impl.max_version_), + max_qualifiers_(impl.max_qualifiers_), pack_interval_(impl.pack_interval_), snapshot_(impl.snapshot_), table_schema_(impl.table_schema_) { @@ -622,6 +652,10 @@ void ScanDescImpl::SetMaxVersions(int32_t versions) { max_version_ = versions; } +void ScanDescImpl::SetMaxQualifiers(int64_t max_qualifiers) { + max_qualifiers_ = max_qualifiers; +} + void ScanDescImpl::SetPackInterval(int64_t interval) { pack_interval_ = interval; } @@ -693,6 +727,10 @@ int32_t ScanDescImpl::GetMaxVersion() const { return max_version_; } +int64_t ScanDescImpl::GetMaxQualifiers() const { + return max_qualifiers_; +} + int64_t ScanDescImpl::GetPackInterval() const { return pack_interval_; } diff --git a/src/sdk/scan_impl.h b/src/sdk/scan_impl.h index 2d808044f..32d647c8b 100644 --- a/src/sdk/scan_impl.h +++ b/src/sdk/scan_impl.h @@ -16,7 +16,7 @@ #include "sdk/sdk_task.h" #include "tera.h" #include "types.h" -#include "utils/timer.h" +#include "common/timer.h" namespace tera { @@ -162,21 +162,10 @@ class ResultStreamSyncImpl : public ResultStreamImpl { int32_t result_pos_; mutable Mutex finish_mutex_; common::CondVar finish_cond_; + int32_t retry_times_; bool finish_; }; -struct ScanTask : public SdkTask { - ResultStreamImpl* stream; - tera::ScanTabletRequest* request; - tera::ScanTabletResponse* response; - - uint32_t retry_times; - void IncRetryTimes() { retry_times++; } - uint32_t RetryTimes() { return retry_times; } - ScanTask() : SdkTask(SdkTask::SCAN), stream(NULL), request(NULL), - response(NULL), retry_times(0) {} -}; - typedef ScanDescriptor::ValueConverter ValueConverter; class ScanDescImpl { @@ -195,6 +184,8 @@ class ScanDescImpl { void SetMaxVersions(int32_t versions); + void SetMaxQualifiers(int64_t max_qualifiers); + void SetPackInterval(int64_t timeout); void SetTimeRange(int64_t ts_end, int64_t ts_start); @@ -238,6 +229,8 @@ class ScanDescImpl { int32_t GetMaxVersion() const; + int64_t GetMaxQualifiers() const; + int64_t GetPackInterval() const; uint64_t GetSnapshot() const; @@ -272,6 +265,7 @@ class ScanDescImpl { int64_t number_limit_; bool is_async_; int32_t max_version_; + int64_t max_qualifiers_; int64_t pack_interval_; uint64_t snapshot_; std::string filter_string_; @@ -280,6 +274,26 @@ class ScanDescImpl { TableSchema table_schema_; }; +struct ScanTask : public SdkTask { + ResultStreamImpl* stream; + tera::ScanTabletRequest* request; + tera::ScanTabletResponse* response; + + uint32_t retry_times; + void IncRetryTimes() { retry_times++; } + uint32_t RetryTimes() { return retry_times; } + ScanTask() : SdkTask(SdkTask::SCAN), stream(NULL), request(NULL), + response(NULL), retry_times(0) {} + + virtual bool IsAsync() { return false; } + virtual uint32_t Size() { return 0; } + virtual int64_t TimeOut() { return 0; } + virtual void Wait() {} + virtual void SetError(ErrorCode::ErrorCodeType err, + const std::string& reason) {} + virtual const std::string& RowKey() { return stream->GetScanDesc()->GetStartRowKey(); } +}; + } // namespace tera #endif // TERA_SDK_SCAN_IMPL_H_ diff --git a/src/sdk/schema_impl.cc b/src/sdk/schema_impl.cc index 7e9e3b264..bf8cc6f00 100644 --- a/src/sdk/schema_impl.cc +++ b/src/sdk/schema_impl.cc @@ -14,6 +14,7 @@ DECLARE_int64(tera_master_merge_tablet_size); namespace tera { const std::string TableDescImpl::DEFAULT_LG_NAME = "lg0"; +const std::string TableDescImpl::NOTIFY_LG_NAME = "notify"; const std::string TableDescImpl::DEFAULT_CF_NAME = ""; /// 列族名字仅允许使用字母、数字和下划线构造, 长度不超过256 @@ -29,7 +30,9 @@ CFDescImpl::CFDescImpl(const std::string& cf_name, acl_(0), owner_(0), disk_quota_(-1), - type_("") { + type_(""), + is_global_transaction_(false), + is_notify_enabled_(false) { } int32_t CFDescImpl::Id() const { @@ -88,6 +91,30 @@ ACL CFDescImpl::Acl() const { return ACL(); } +void CFDescImpl::EnableGlobalTransaction() { + is_global_transaction_ = true; +} + +void CFDescImpl::DisableGlobalTransaction() { + is_global_transaction_ = false; +} + +bool CFDescImpl::GlobalTransaction() const { + return is_global_transaction_; +} + +void CFDescImpl::EnableNotify() { + is_notify_enabled_ = true; +} + +void CFDescImpl::DisableNotify() { + is_notify_enabled_ = false; +} + +bool CFDescImpl::IsNotifyEnabled() const { + return is_notify_enabled_; +} + void CFDescImpl::SetType(const std::string& type) { type_ = type; } diff --git a/src/sdk/schema_impl.h b/src/sdk/schema_impl.h index a68a09f77..dcdb6c9a6 100644 --- a/src/sdk/schema_impl.h +++ b/src/sdk/schema_impl.h @@ -48,6 +48,18 @@ class CFDescImpl : public ColumnFamilyDescriptor { ACL Acl() const; + void EnableGlobalTransaction(); + + void DisableGlobalTransaction(); + + bool GlobalTransaction() const; + + void EnableNotify(); + + void DisableNotify(); + + bool IsNotifyEnabled() const; + void SetType(const std::string& type); const std::string& Type() const; @@ -63,6 +75,8 @@ class CFDescImpl : public ColumnFamilyDescriptor { int32_t owner_; int32_t disk_quota_; std::string type_; + bool is_global_transaction_; + bool is_notify_enabled_; }; /// 局部性群组描述 @@ -192,6 +206,7 @@ class TableDescImpl { std::string Alias() const; static const std::string DEFAULT_LG_NAME; + static const std::string NOTIFY_LG_NAME; static const std::string DEFAULT_CF_NAME; private: diff --git a/src/sdk/sdk_metric_name.h b/src/sdk/sdk_metric_name.h new file mode 100644 index 000000000..5b358e912 --- /dev/null +++ b/src/sdk/sdk_metric_name.h @@ -0,0 +1,58 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_SDK_METRIC_NAME_H_ +#define TERA_SDK_METRIC_NAME_H_ + +#include + +#include "common/metric/hardware_collectors.h" + +namespace tera { + +// global transaction labels +const char* const kGTxnLabelRead = "gtxn:read"; +const char* const kGTxnLabelCommit = "gtxn:commit"; +const char* const kGTxnLabelTso = "gtxn:tso"; + +// glabel transaction read metric names +const char* const kGTxnReadDelayMetric = "tera_sdk_gtxn_read_delay_us"; +const char* const kGTxnReadCountMetric = "tera_sdk_gtxn_read_count"; +const char* const kGTxnReadFailCountMetric = "tera_sdk_gtxn_read_fail_count"; +const char* const kGTxnReadRetryCountMetric = "tera_sdk_gtxn_read_retry_count"; +const char* const kGTxnReadRollBackCountMetric = "tera_sdk_gtxn_read_rollback_count"; +const char* const kGTxnReadRollForwardCountMetric = "tera_sdk_gtxn_read_rollforward_count"; + +// global transaction commit metric names +const char* const kGTxnCommitDelayMetric = "tera_sdk_gtxn_commit_delay_us"; +const char* const kGTxnCommitCountMetric = "tera_sdk_gtxn_commit_count"; +const char* const kGTxnCommitFailCountMetric = "tera_sdk_gtxn_commit_fail_count"; + +const char* const kGTxnPrewriteDelayMetric = "tera_sdk_gtxn_prewrite_delay_us"; +const char* const kGTxnPrewriteCountMetric = "tera_sdk_gtxn_prewrite_count"; +const char* const kGTxnPrewriteFailCountMetric = "tera_sdk_gtxn_prewrite_fail_count"; + +const char* const kGTxnPrimaryDelayMetric = "tera_sdk_gtxn_primary_delay_us"; +const char* const kGTxnPrimaryCountMetric = "tera_sdk_gtxn_primary_count"; +const char* const kGTxnPrimaryFailCountMetric = "tera_sdk_gtxn_primary_fail_count"; + +const char* const kGTxnSecondariesDelayMetric = "tera_sdk_gtxn_secondaries_delay_us"; +const char* const kGTxnSecondariesCountMetric = "tera_sdk_gtxn_secondaries_count"; +const char* const kGTxnSecondariesFailCountMetric = "tera_sdk_gtxn_secondaries_fail_count"; + +const char* const kGTxnAcksDelayMetric = "tera_sdk_gtxn_acks_delay_us"; +const char* const kGTxnAcksCountMetric = "tera_sdk_gtxn_acks_count"; +const char* const kGTxnAcksFailCountMetric = "tera_sdk_gtxn_acks_fail_count"; + +const char* const kGTxnNotifiesDelayMetric = "tera_sdk_gtxn_notifies_delay_us"; +const char* const kGTxnNotifiesCountMetric = "tera_sdk_gtxn_notifies_count"; +const char* const kGTxnNotifiesFailCountMetric = "tera_sdk_gtxn_notifies_fail_count"; + +const char* const kGTxnTsoDelayMetric = "tera_sdk_gtxn_tso_delay_us"; +const char* const kGTxnTsoRequestCountMetric = "tera_sdk_gtxn_tso_request_count"; +} // end namespace tera + +#endif // TERA_SDK_METRIC_NAME_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/sdk_perf.cc b/src/sdk/sdk_perf.cc new file mode 100644 index 000000000..7cc5704d8 --- /dev/null +++ b/src/sdk/sdk_perf.cc @@ -0,0 +1,85 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sdk/sdk_perf.h" + +#include "gflags/gflags.h" + +#include "common/metric/metric_counter.h" +#include "sdk/sdk_metric_name.h" + +namespace tera { +namespace sdk { + +void PerfCollecter::DumpLog() { + std::shared_ptr latest_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + int64_t interval = latest_report->interval_ms; + if (interval <= 0) { + // maybe happen at first report, the metric values must be 0 + // set to any non-zero value to avoid div 0 + VLOG(16) << "Metric Report interval is 0"; + interval = 1000; + } + int64_t read_delay = latest_report->FindMetricValue(kGTxnReadDelayMetric, kGTxnLabelRead); + int64_t read_cnt = latest_report->FindMetricValue(kGTxnReadCountMetric, kGTxnLabelRead); + read_delay = read_cnt > 0 ? read_delay / read_cnt : 0; + + LOG(INFO) << "[perf][gtxn] " + << "read_delay " << read_delay << " read_cnt " << read_cnt << " read_fail " + << latest_report->FindMetricValue(kGTxnReadFailCountMetric, kGTxnLabelRead) + << " read_retry_cnt " + << latest_report->FindMetricValue(kGTxnReadRetryCountMetric, kGTxnLabelRead) + << " read_rollback_cnt " + << latest_report->FindMetricValue(kGTxnReadRollBackCountMetric, kGTxnLabelRead) + << " read_rollforward_cnt " + << latest_report->FindMetricValue(kGTxnReadRollForwardCountMetric, kGTxnLabelRead); + + int64_t commit_delay = latest_report->FindMetricValue(kGTxnCommitDelayMetric, kGTxnLabelCommit); + int64_t commit_cnt = latest_report->FindMetricValue(kGTxnCommitCountMetric, kGTxnLabelCommit); + commit_delay = commit_cnt > 0 ? commit_delay / commit_cnt : 0; + + int64_t prewrite_delay = latest_report->FindMetricValue(kGTxnPrewriteDelayMetric, kGTxnLabelCommit); + int64_t prewrite_cnt = latest_report->FindMetricValue(kGTxnPrewriteCountMetric, kGTxnLabelCommit); + prewrite_delay = prewrite_cnt > 0 ? prewrite_delay / prewrite_cnt : 0; + + int64_t primary_delay = latest_report->FindMetricValue(kGTxnPrimaryDelayMetric, kGTxnLabelCommit); + int64_t primary_cnt = latest_report->FindMetricValue(kGTxnPrimaryCountMetric, kGTxnLabelCommit); + primary_delay = primary_cnt > 0 ? primary_delay / primary_cnt : 0; + + int64_t secondaries_delay = latest_report->FindMetricValue(kGTxnSecondariesDelayMetric, kGTxnLabelCommit); + int64_t secondaries_cnt = latest_report->FindMetricValue(kGTxnSecondariesCountMetric, kGTxnLabelCommit); + secondaries_delay = secondaries_cnt > 0 ? secondaries_delay / secondaries_cnt : 0; + + LOG(INFO) << "[perf][gtxn] " + << "commit_delay " << commit_delay << " commit_cnt " << commit_cnt << " commit_fail " + << latest_report->FindMetricValue(kGTxnCommitFailCountMetric, kGTxnLabelCommit) + << " prew_delay " << prewrite_delay << " prew_cnt " << prewrite_cnt << " prew_fail " + << latest_report->FindMetricValue(kGTxnPrewriteFailCountMetric, kGTxnLabelCommit) + << " pri_delay " << primary_delay << " pri_cnt " << primary_cnt << " pri_fail " + << latest_report->FindMetricValue(kGTxnPrimaryFailCountMetric, kGTxnLabelCommit) + << " se_delay " << secondaries_delay << " se_cnt " << secondaries_cnt << " se_fail " + << latest_report->FindMetricValue(kGTxnSecondariesFailCountMetric, kGTxnLabelCommit); + + int64_t tso_delay = latest_report->FindMetricValue(kGTxnTsoDelayMetric, kGTxnLabelTso); + int64_t tso_cnt = latest_report->FindMetricValue(kGTxnTsoRequestCountMetric, kGTxnLabelTso); + tso_delay = tso_cnt > 0 ? tso_delay / tso_cnt : 0; + LOG(INFO) << "[perf][gtxn] tso_delay " << tso_delay << " tso_cnt " << tso_cnt; + + int64_t notify_delay = latest_report->FindMetricValue(kGTxnNotifiesDelayMetric, kGTxnLabelCommit); + int64_t notify_cnt = latest_report->FindMetricValue(kGTxnNotifiesCountMetric, kGTxnLabelCommit); + notify_delay = notify_cnt > 0 ? notify_delay / notify_cnt : 0; + + int64_t ack_delay = latest_report->FindMetricValue(kGTxnAcksDelayMetric, kGTxnLabelCommit); + int64_t ack_cnt = latest_report->FindMetricValue(kGTxnAcksCountMetric, kGTxnLabelCommit); + ack_delay = ack_cnt > 0 ? ack_delay / ack_cnt : 0; + + LOG(INFO) << "[perf][gtxn] " + << "notify_delay " << notify_delay << " notify_cnt " << notify_cnt << " notify_fail " + << latest_report->FindMetricValue(kGTxnNotifiesFailCountMetric, kGTxnLabelCommit) + << " ack_delay " << ack_delay << " ack_cnt " << ack_cnt << " ack_fail " + << latest_report->FindMetricValue(kGTxnAcksFailCountMetric, kGTxnLabelCommit); +} + +} // namespace sdk +} // namespace tera diff --git a/src/sdk/sdk_perf.h b/src/sdk/sdk_perf.h new file mode 100644 index 000000000..d6b756a9e --- /dev/null +++ b/src/sdk/sdk_perf.h @@ -0,0 +1,54 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_SDK_SDK_PERF_H_ +#define TERA_SDK_SDK_PERF_H_ + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "common/metric/metric_counter.h" +#include "common/metric/collector_report.h" +#include "common/thread.h" +#include "common/this_thread.h" +#include "tera.h" + +DECLARE_int32(tera_sdk_perf_collect_interval); + +namespace tera { +namespace sdk { + +class PerfCollecter { +public: + PerfCollecter() : stopped_(false){} + ~PerfCollecter() {} + + void Run() { + thread_.Start(std::bind(&PerfCollecter::ScheduleCollect, this)); + } + + void Stop() { + stopped_ = true; + thread_.Join(); + } + +private: + void ScheduleCollect() { + while (!stopped_) { + CollectorReportPublisher::GetInstance().Refresh(); + DumpLog(); + ThisThread::Sleep(FLAGS_tera_sdk_perf_collect_interval); + } + } + + void DumpLog(); +private: + common::Thread thread_; + bool stopped_; +}; + +} // namespace sdk +} // namespace tera + +#endif // TERA_SDK_SDK_PERF_H_ diff --git a/src/sdk/sdk_task.cc b/src/sdk/sdk_task.cc index ce1d64e2d..834bb4a97 100644 --- a/src/sdk/sdk_task.cc +++ b/src/sdk/sdk_task.cc @@ -6,7 +6,7 @@ #include -#include "utils/timer.h" +#include "common/timer.h" DECLARE_int32(tera_sdk_timeout_precision); @@ -124,7 +124,8 @@ SdkTask* SdkTimeoutManager::PopTask(int64_t task_id) { SdkTask* task = it->second; CHECK_EQ(task->GetId(), task_id); map.id_hash_map.erase(it); - map.due_time_map.erase(task); + // make sure that we only erased the right one element + assert(map.due_time_map.erase(task) == 1); return task; } else { return NULL; diff --git a/src/sdk/sdk_task.h b/src/sdk/sdk_task.h index 58f61f65a..34ec25b1d 100644 --- a/src/sdk/sdk_task.h +++ b/src/sdk/sdk_task.h @@ -24,7 +24,8 @@ class SdkTask { enum TYPE { READ, MUTATION, - SCAN + SCAN, + TASKBATCH, }; TYPE Type() { return type_; } @@ -48,6 +49,14 @@ class SdkTask { void DecRef(); void ExcludeOtherRef(); + virtual bool IsAsync() = 0; + virtual uint32_t Size() = 0; + virtual int64_t TimeOut() = 0; + virtual void Wait() = 0; + virtual void SetError(ErrorCode::ErrorCodeType err, + const std::string& reason) = 0; + virtual const std::string& RowKey() = 0; + protected: SdkTask(TYPE type) : type_(type), @@ -76,7 +85,10 @@ typedef void (*StatCallback)(Table* table, SdkTask* task); struct SdkTaskDueTimeComp { bool operator() (SdkTask* lhs, SdkTask* rhs) { - return lhs->DueTime() < rhs->DueTime(); + if (lhs->DueTime() != rhs->DueTime()) { + return lhs->DueTime() < rhs->DueTime(); + } + return lhs->GetId() < rhs->GetId(); } }; diff --git a/src/sdk/sdk_utils.cc b/src/sdk/sdk_utils.cc index 175bc7245..b135b99ed 100644 --- a/src/sdk/sdk_utils.cc +++ b/src/sdk/sdk_utils.cc @@ -18,6 +18,7 @@ #include "sdk/schema_impl.h" #include "sdk/filter_utils.h" +#include "types.h" DECLARE_int64(tera_tablet_write_block_size); DECLARE_int64(tera_tablet_ldb_sst_size); @@ -184,6 +185,12 @@ void ShowTableSchema(const TableSchema& s, bool is_x) { cf_ss << "type=bytes" << ","; } } + if (is_x || (cf_schema.gtxn() != false)) { + cf_ss << "gtxn=" << Switch2Str(cf_schema.gtxn()) << ","; + } + if (is_x || (cf_schema.notify() != false)) { + cf_ss << "notify=" << Switch2Str(cf_schema.notify()) << ","; + } cf_ss << "\b>"; if (cf_ss.str().size() > 5) { ss << cf_ss.str(); @@ -281,6 +288,8 @@ void TableDescToSchema(const TableDescriptor& desc, TableSchema* schema) { cf->set_max_versions(cf_desc->MaxVersions()); cf->set_min_versions(cf_desc->MinVersions()); cf->set_type(cf_desc->Type()); + cf->set_gtxn(cf_desc->GlobalTransaction()); + cf->set_notify(cf_desc->IsNotifyEnabled()); } } @@ -365,6 +374,16 @@ void TableSchemaToDesc(const TableSchema& schema, TableDescriptor* desc) { cfd->SetMinVersions(cf.min_versions()); cfd->SetTimeToLive(cf.time_to_live()); cfd->SetType(cf.type()); + if (cf.gtxn()) { + cfd->EnableGlobalTransaction(); + } else { + cfd->DisableGlobalTransaction(); + } + if (cf.notify()) { + cfd->EnableNotify(); + } else { + cfd->DisableNotify(); + } } } @@ -402,6 +421,22 @@ bool SetCfProperties(const string& name, const string& value, return false; } desc->SetType(value); + } else if (name == "gtxn") { + if (value == "on") { + desc->EnableGlobalTransaction(); + } else if (value == "off") { + desc->DisableGlobalTransaction(); + } else { + return false; + } + } else if (name == "notify") { + if (value == "on") { + desc->EnableNotify(); + } else if (value == "off") { + desc->DisableNotify(); + } else { + return false; + } }else { return false; } @@ -556,6 +591,13 @@ bool CheckTableDescrptor(const TableDescriptor& desc, ErrorCode* err) { } return false; } + if (!desc.IsTxnEnabled() && desc.ColumnFamily(i)->GlobalTransaction() == true) { + ss << " columnfamily property: gtxn is valid only when table set 'txn=on') "; + if (err != NULL) { + err->SetFailed(ErrorCode::kBadParam, ss.str()); + } + return false; + } } if (desc.IsTxnEnabled() && (desc.RawKey() == kGeneralKv || desc.RawKey() == kTTLKv)) { ss << "kv and ttlkv don't support txn"; @@ -806,6 +848,8 @@ bool FillTableDescriptor(PropTree& schema_tree, TableDescriptor* table_desc) { return false; } } + // extend notify locality group and _N_ columnfamily + return ExtendNotifyLgToDescriptor(table_desc); } else if (schema_tree.MaxDepth() == 3) { // full mode, all elements are user-defined // e.g. table1{ @@ -860,6 +904,8 @@ bool FillTableDescriptor(PropTree& schema_tree, TableDescriptor* table_desc) { return false; } } + // extend notify locality group and _N_ columnfamily + return ExtendNotifyLgToDescriptor(table_desc); } else { LOG(FATAL) << "never here."; } @@ -975,4 +1021,56 @@ bool IsKvTable(const TableSchema& schema) { schema.raw_key() == TTLKv); } +bool IsTransactionTable(const TableSchema& schema) { + return schema.enable_txn(); +} + +void FindGlobalTransactionCfs(const TableSchema& schema, + std::set* column_families) { + size_t cf_num = schema.column_families_size(); + for (size_t cf_no = 0; cf_no < cf_num; ++cf_no) { + const ColumnFamilySchema& cf_schema = schema.column_families(cf_no); + if (cf_schema.gtxn()) { + column_families->insert(cf_schema.name()); + } + } +} + +bool ExtendNotifyLgToDescriptor(TableDescriptor* desc) { + bool do_extend = false; + bool have_n_cf = false; + for (int32_t i = 0; i < desc->ColumnFamilyNum(); ++i) { + if (desc->ColumnFamily(i)->Name() == kNotifyColumnFamily) { + have_n_cf = true; + } + if (desc->ColumnFamily(i)->IsNotifyEnabled()) { + do_extend = true; + } + } + if (!do_extend) { + return true; + } else if (do_extend && have_n_cf) { + return false; + } + if (desc->LocalityGroup(TableDescImpl::NOTIFY_LG_NAME) != NULL) { + LOG(ERROR) << "already exists locality group: " + << TableDescImpl::NOTIFY_LG_NAME; + return false; + } + LocalityGroupDescriptor* lg_desc + = desc->AddLocalityGroup(TableDescImpl::NOTIFY_LG_NAME); + if (lg_desc == NULL) { + LOG(ERROR) << "fail to add locality group: " + << TableDescImpl::NOTIFY_LG_NAME; + return false; + } + ColumnFamilyDescriptor* cf_desc + = desc->AddColumnFamily(kNotifyColumnFamily, TableDescImpl::NOTIFY_LG_NAME); + if (cf_desc == NULL) { + LOG(ERROR) << "fail to add column family: " << kNotifyColumnFamily; + return false; + } + return true; +} + } // namespace tera diff --git a/src/sdk/sdk_utils.h b/src/sdk/sdk_utils.h index 4974575af..0e8ddad54 100644 --- a/src/sdk/sdk_utils.h +++ b/src/sdk/sdk_utils.h @@ -50,5 +50,11 @@ bool ParseDelimiterFile(const string& filename, std::vector* delims); bool IsKvTable(const TableSchema& schema); +bool ExtendNotifyLgToDescriptor(TableDescriptor* desc); + +bool IsTransactionTable(const TableSchema& schema); + +void FindGlobalTransactionCfs(const TableSchema& schema, std::set* column_families); + } // namespace tera #endif // TERA_SDK_SDK_UTILS_H_ diff --git a/src/sdk/sdk_zk.cc b/src/sdk/sdk_zk.cc index e08bb6c9b..874b4912c 100644 --- a/src/sdk/sdk_zk.cc +++ b/src/sdk/sdk_zk.cc @@ -5,11 +5,15 @@ #include "sdk/sdk_zk.h" #include +#include +#include +#include #include +#include "common/this_thread.h" #include "ins_sdk.h" - #include "types.h" +#include "utils/utils_cmd.h" #include "zk/zk_adapter.h" DECLARE_string(tera_zk_lib_log_path); @@ -18,14 +22,174 @@ DECLARE_bool(tera_zk_enabled); DECLARE_bool(tera_mock_zk_enabled); DECLARE_string(tera_zk_addr_list); DECLARE_string(tera_zk_root_path); +DECLARE_int32(tera_zk_timeout); +DECLARE_int32(tera_zk_retry_max_times); +DECLARE_int64(tera_zk_retry_period); DECLARE_bool(tera_ins_enabled); DECLARE_string(tera_ins_root_path); DECLARE_string(tera_ins_addr_list); +DECLARE_int64(tera_sdk_ins_session_timeout); DECLARE_bool(tera_mock_ins_enabled); +DECLARE_bool(tera_timeoracle_mock_enabled); +DECLARE_string(tera_timeoracle_mock_root_path); +DECLARE_string(tera_coord_type); namespace tera { namespace sdk { +static pthread_once_t zk_init_once = PTHREAD_ONCE_INIT; + +static void InitZkLogOnce() { + zk::ZooKeeperLightAdapter::SetLibraryLogOutput(FLAGS_tera_zk_lib_log_path); +} + +bool ClientZkAdapter::Init() { + pthread_once(&zk_init_once, InitZkLogOnce); + MutexLock lock(&mutex_); + LOG(INFO) << "try init zk ..."; + int zk_errno = zk::ZE_OK; + int32_t retry_cnt = 0; + int wait_time = 60000; + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, + FLAGS_tera_zk_root_path, + FLAGS_tera_zk_timeout, + "", &zk_errno, wait_time)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) + << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " + << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "init zk success"; + return true; +} + +bool ClientZkAdapter::RegisterClient(std::string* path) { + int64_t session_id = 0; + int zk_errno = zk::ZE_OK; + int32_t retry_cnt = 0; + LOG(INFO) << "try get client sesssion"; + while (!GetSessionId(&session_id, &zk_errno)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to get client session : " + << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "get client session fail: " << zk::ZkErrnoToString(zk_errno) + << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " + << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + std::string internal_path = utils::GetLocalHostAddr() + + "-" + std::to_string(getpid()) + + "-" + std::to_string(session_id); + LOG(INFO) << "get client session success : " << internal_path; + zk_errno = zk::ZE_OK; + retry_cnt = 0; + LOG(INFO) << "try create client node : " << internal_path; + while (!CreateEphemeralNode(kClientsNodePath + "/" + internal_path, + "", + &zk_errno)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create client node : " + << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "create client node fail: " << zk::ZkErrnoToString(zk_errno) + << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " + << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "create client node success"; + *path = internal_path; + return true; +} + +bool ClientZkAdapter::IsClientAlive(const std::string& path) { + VLOG(12) << "try check client alive : " << path; + int32_t retry_cnt = 0; + int zk_errno = zk::ZE_OK; + bool ret = true; + while (!CheckExist(kClientsNodePath + "/" + path, &ret, &zk_errno)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to check client alive : " + << zk::ZkErrnoToString(zk_errno); + // when zk server error, client should think other client is alive + return true; + } + LOG(ERROR) << "check client alive fail: " << zk::ZkErrnoToString(zk_errno) + << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " + << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + VLOG(12) << "check client alive success"; + return ret; +} + +bool ClientZkAdapter::ReadNode(const std::string& path, std::string* value) { + VLOG(12) << "try read node : " << path; + int32_t retry_cnt = 0; + int zk_errno = zk::ZE_OK; + while (!ZooKeeperAdapter::ReadNode(path, value, &zk_errno)) { + if (retry_cnt++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to read node : " + << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "read node fail: " << zk::ZkErrnoToString(zk_errno) + << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " + << retry_cnt; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + VLOG(12) << "read node success"; + return true; +} + +bool InsClientZkAdapter::Init() { + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + ins_sdk_->SetTimeoutTime(FLAGS_tera_sdk_ins_session_timeout); + return true; +} + +bool InsClientZkAdapter::RegisterClient(std::string* path) { + std::string internal_path = utils::GetLocalHostAddr() + + "-" + std::to_string(getpid()) + + "-" + ins_sdk_->GetSessionID(); + LOG(INFO) << "get client session success : " << internal_path; + std::string client_path = FLAGS_tera_ins_root_path + kClientsNodePath + + "/" + internal_path; + galaxy::ins::sdk::SDKError err; + bool ret = ins_sdk_->Put(client_path, "", &err); + if (ret) { + *path = internal_path; + } + return ret; +} + +bool InsClientZkAdapter::IsClientAlive(const std::string& path) { + std::string client_path = kClientsNodePath + "/" + path; + std::string value; + return ReadNode(client_path, &value); +} + +bool InsClientZkAdapter::ReadNode(const std::string& path, std::string* value) { + std::string target_path = FLAGS_tera_ins_root_path + path; + galaxy::ins::sdk::SDKError err; + if (!ins_sdk_->Get(target_path, value, &err)) { + LOG(ERROR) << "ins read " << target_path << " fail: " << err; + return false; + } + return true; +} + std::string ClusterFinder::MasterAddr(bool update) { std::string master_addr; if (update || master_addr_ == "") { @@ -41,6 +205,21 @@ std::string ClusterFinder::MasterAddr(bool update) { return master_addr_; } +std::string ClusterFinder::TimeoracleAddr(bool update) { + std::string timeoracle_addr; + if (update || timeoracle_addr_ == "") { + if (!ReadNode(kTimeoracleNodePath, &timeoracle_addr)) { + timeoracle_addr = ""; + } + } + if (!timeoracle_addr.empty()) { + MutexLock lock(&mutex_); + timeoracle_addr_ = timeoracle_addr; + LOG(INFO) << "timeoracle addr: " << timeoracle_addr_; + } + return timeoracle_addr_; +} + std::string ClusterFinder::RootTableAddr(bool update) { std::string root_table_addr; if (update || root_table_addr_ == "") { @@ -72,46 +251,54 @@ std::string ClusterFinder::ClusterId() { } ZkClusterFinder::ZkClusterFinder(const std::string& zk_root_path, - const std::string& zk_addr_list) - : zk_root_path_(zk_root_path), zk_addr_list_(zk_addr_list) { -} - -static pthread_once_t zk_init_once = PTHREAD_ONCE_INIT; - -static void InitZkLogOnce() { - zk::ZooKeeperLightAdapter::SetLibraryLogOutput(FLAGS_tera_zk_lib_log_path); + const std::string& zk_addr_list, + ClientZkAdapterBase* zk_adapter) + : zk_root_path_(zk_root_path), + zk_addr_list_(zk_addr_list), + zk_adapter_(zk_adapter) { } bool ZkClusterFinder::ReadNode(const std::string& name, std::string* value) { - pthread_once(&zk_init_once, InitZkLogOnce); + if (zk_adapter_ == NULL) { + pthread_once(&zk_init_once, InitZkLogOnce); - int zk_errno = tera::zk::ZE_OK; - zk::ZooKeeperLightAdapter zk_adapter; - if (!zk_adapter.Init(zk_addr_list_, zk_root_path_, 1000 * 15, "", &zk_errno)) { - LOG(ERROR) << "Init zookeeper fail: " << tera::zk::ZkErrnoToString(zk_errno); - return false; - } + int zk_errno = tera::zk::ZE_OK; + zk::ZooKeeperLightAdapter zk_adapter; + if (!zk_adapter.Init(zk_addr_list_, zk_root_path_, 1000 * 15, "", &zk_errno)) { + LOG(ERROR) << "Init zookeeper fail: " << tera::zk::ZkErrnoToString(zk_errno); + return false; + } - if (!zk_adapter.ReadNode(name, value, &zk_errno)) { - LOG(ERROR) << "zk read " << name << " fail: " << zk::ZkErrnoToString(zk_errno); - return false; + if (!zk_adapter.ReadNode(name, value, &zk_errno)) { + LOG(ERROR) << "zk read " << name << " fail: " << zk::ZkErrnoToString(zk_errno); + return false; + } + return true; + } else { + return zk_adapter_->ReadNode(name, value); } - return true; } InsClusterFinder::InsClusterFinder(const std::string& ins_root_path, - const std::string& ins_addr_list) - : ins_root_path_(ins_root_path), ins_addr_list_(ins_addr_list) { + const std::string& ins_addr_list, + ClientZkAdapterBase* zk_adapter) + : ins_root_path_(ins_root_path), + ins_addr_list_(ins_addr_list), + zk_adapter_(zk_adapter) { } bool InsClusterFinder::ReadNode(const std::string& name, std::string* value) { - galaxy::ins::sdk::InsSDK ins_sdk(ins_addr_list_); - galaxy::ins::sdk::SDKError err; - if (!ins_sdk.Get(ins_root_path_ + name, value, &err)) { - LOG(ERROR) << "ins read " << name << " fail: " << err; - return false; + if (zk_adapter_ == NULL) { + galaxy::ins::sdk::InsSDK ins_sdk(ins_addr_list_); + galaxy::ins::sdk::SDKError err; + if (!ins_sdk.Get(ins_root_path_ + name, value, &err)) { + LOG(ERROR) << "ins read " << name << " fail: " << err; + return false; + } + return true; + } else { + return zk_adapter_->ReadNode(name, value); } - return true; } FakeZkClusterFinder::FakeZkClusterFinder(const std::string& fake_zk_path_prefix) @@ -122,18 +309,84 @@ bool FakeZkClusterFinder::ReadNode(const std::string& name, std::string* value) return zk::FakeZkUtil::ReadNode(fake_zk_path_prefix_ + name, value); } -ClusterFinder* NewClusterFinder() { - if (FLAGS_tera_zk_enabled) { - return new sdk::ZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list); - } else if (FLAGS_tera_ins_enabled) { - return new sdk::InsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list); - } else if (FLAGS_tera_mock_zk_enabled) { +MockTimeoracleClusterFinder::MockTimeoracleClusterFinder(const std::string& mock_root_path) { + mock_root_path_ = mock_root_path; +} + +bool MockTimeoracleClusterFinder::ReadNode(const std::string& kpath, std::string* value) { + std::string path = mock_root_path_ + kpath; + int fd = ::open(path.c_str(), O_RDWR); + if (fd < 0) { + return false; + } + + value->resize(1024); + char *buf = &(*value)[0]; + ssize_t len = ::pread(fd, buf, sizeof(buf), 0); + ::close(fd); + if (len < 0) { + return false; + } + value->resize(len); + return true; +} + +ClientZkAdapterBase* NewClientZkAdapter() { + if (FLAGS_tera_coord_type.empty()) { + LOG(ERROR) << "Note: We don't recommend that use '--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster coord" + << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]' flag is usually recommended."; + } + + if (FLAGS_tera_coord_type == "zk" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { + return new sdk::ClientZkAdapter(); + } else if (FLAGS_tera_coord_type == "ins" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { + return new sdk::InsClientZkAdapter(); + } else if (FLAGS_tera_coord_type == "mock_zk" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { + return new sdk::MockClientZkAdapter(); + } else if (FLAGS_tera_coord_type == "mock_ins" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { + return new sdk::MockInsClientZkAdapter(); + } + return NULL; +} + +ClusterFinder* NewClusterFinder(ClientZkAdapterBase* zk_adapter) { + if (FLAGS_tera_coord_type.empty()) { + LOG(ERROR) << "Note: We don't recommend that use '--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster coord" + << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]' flag is usually recommended."; + } + if (FLAGS_tera_coord_type == "zk" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { + return new sdk::ZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list, zk_adapter); + } else if (FLAGS_tera_coord_type == "ins" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { + return new sdk::InsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list, zk_adapter); + } else if (FLAGS_tera_coord_type == "mock_zk" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { return new sdk::MockZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list); - } else if (FLAGS_tera_mock_ins_enabled) { + } else if (FLAGS_tera_coord_type == "mock_ins" + || (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { return new sdk::MockInsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list); - } else { + } else if (FLAGS_tera_coord_type == "fake_zk" + || FLAGS_tera_coord_type.empty()) { return new sdk::FakeZkClusterFinder(FLAGS_tera_fake_zk_path_prefix); } + return nullptr; +} + +ClusterFinder* NewTimeoracleClusterFinder() { + if (FLAGS_tera_timeoracle_mock_enabled) { + return new sdk::MockTimeoracleClusterFinder(FLAGS_tera_timeoracle_mock_root_path); + } else if (FLAGS_tera_coord_type == "zk") { + return new sdk::ZkClusterFinder(FLAGS_tera_zk_root_path, FLAGS_tera_zk_addr_list); + } else if (FLAGS_tera_coord_type == "ins") { + return new sdk::InsClusterFinder(FLAGS_tera_ins_root_path, FLAGS_tera_ins_addr_list); + } + + return nullptr; } } // namespace sdk diff --git a/src/sdk/sdk_zk.h b/src/sdk/sdk_zk.h index dc199abe6..8ad026ebd 100644 --- a/src/sdk/sdk_zk.h +++ b/src/sdk/sdk_zk.h @@ -9,9 +9,95 @@ #include #include +#include "ins_sdk.h" +#include "zk/zk_adapter.h" + +namespace galaxy{ +namespace ins{ +namespace sdk { + class InsSDK; +} +} +} + namespace tera { namespace sdk { +class ClientZkAdapterBase : public zk::ZooKeeperLightAdapter { +public: + virtual ~ClientZkAdapterBase() {}; + virtual bool Init() = 0; + virtual bool RegisterClient(std::string* session_str) = 0; + virtual bool IsClientAlive(const std::string& path) = 0; + virtual bool ReadNode(const std::string& path, std::string* value) = 0; +}; + +class ClientZkAdapter : public ClientZkAdapterBase { +public: + ClientZkAdapter() {} + virtual ~ClientZkAdapter() {} + virtual bool Init(); + virtual bool RegisterClient(std::string* session_str); + virtual bool IsClientAlive(const std::string& path); + virtual bool ReadNode(const std::string& path, std::string* value); +private: + mutable Mutex mutex_; +}; + +class MockClientZkAdapter : public ClientZkAdapter { +public: + MockClientZkAdapter(): ClientZkAdapter() {} + virtual ~MockClientZkAdapter() {} + virtual bool Init() { return true; } + virtual bool RegisterClient(std::string* session_str) { + *session_str = "localhost"; + return true; + } + virtual bool IsClientAlive(const std::string& path) { + return true; + } + virtual bool ReadNode(const std::string& path, std::string* value) { + *value = "mock_zk_value"; + return true; + } +}; + +class InsClientZkAdapter : public ClientZkAdapterBase { +public: + InsClientZkAdapter() : ins_sdk_(NULL) {} + virtual ~InsClientZkAdapter() { + if (ins_sdk_ != NULL) { + delete ins_sdk_; + } + } + virtual bool Init (); + virtual bool RegisterClient(std::string* session_str); + virtual bool IsClientAlive(const std::string& path); + virtual bool ReadNode(const std::string& path, std::string* value); +private: + galaxy::ins::sdk::InsSDK* ins_sdk_; +}; + +class MockInsClientZkAdapter : public InsClientZkAdapter { +public: + MockInsClientZkAdapter() : InsClientZkAdapter() {} + virtual ~MockInsClientZkAdapter() {} + virtual bool Init() { return true; } + virtual bool RegisterClient(std::string* session_str) { + *session_str = "localhost"; + return true; + } + virtual bool IsClientAlive(const std::string& path) { + return true; + } + virtual bool ReadNode(const std::string& path, std::string* value) { + *value = "mock_ins_value"; + return true; + } +}; + +ClientZkAdapterBase* NewClientZkAdapter(); + class ClusterFinder { public: @@ -19,6 +105,7 @@ class ClusterFinder virtual ~ClusterFinder() {} std::string MasterAddr(bool update = false); std::string RootTableAddr(bool update = false); + std::string TimeoracleAddr(bool update = false); std::string ClusterId(); // cluster URI: :/// protected: @@ -30,12 +117,15 @@ class ClusterFinder private: mutable Mutex mutex_; std::string master_addr_; + std::string timeoracle_addr_; std::string root_table_addr_; }; class ZkClusterFinder : public ClusterFinder { public: - ZkClusterFinder(const std::string& zk_root_path, const std::string& zk_addr_list); + ZkClusterFinder(const std::string& zk_root_path, + const std::string& zk_addr_list, + ClientZkAdapterBase* zk_adapter = NULL); protected: virtual bool ReadNode(const std::string& path, std::string* value); virtual std::string Name() { return "zk"; }; @@ -44,6 +134,7 @@ class ZkClusterFinder : public ClusterFinder { private: std::string zk_root_path_; std::string zk_addr_list_; + ClientZkAdapterBase* zk_adapter_; }; class MockZkClusterFinder : public ZkClusterFinder { @@ -56,7 +147,9 @@ class MockZkClusterFinder : public ZkClusterFinder { class InsClusterFinder : public ClusterFinder { public: - InsClusterFinder(const std::string& ins_root_path, const std::string& ins_addr_list); + InsClusterFinder(const std::string& ins_root_path, + const std::string& ins_addr_list, + ClientZkAdapterBase* zk_adapter = NULL); protected: virtual bool ReadNode(const std::string& path, std::string* value); virtual std::string Name() { return "ins"; } @@ -65,6 +158,7 @@ class InsClusterFinder : public ClusterFinder { private: std::string ins_root_path_; std::string ins_addr_list_; + ClientZkAdapterBase* zk_adapter_; }; class MockInsClusterFinder : public InsClusterFinder { @@ -87,7 +181,24 @@ class FakeZkClusterFinder : public ClusterFinder { std::string fake_zk_path_prefix_; }; -ClusterFinder* NewClusterFinder(); +class MockTimeoracleClusterFinder : public ClusterFinder { +public: + MockTimeoracleClusterFinder(const std::string& mock_root_path); + +protected: + virtual bool ReadNode(const std::string& path, std::string* value); + + virtual std::string Name() { return "fakezk"; }; + + virtual std::string Authority() { return "localhost"; } + + virtual std::string Path() { return mock_root_path_; } +private: + std::string mock_root_path_; +}; + +ClusterFinder* NewTimeoracleClusterFinder(); +ClusterFinder* NewClusterFinder(ClientZkAdapterBase* zk_adapter = NULL); } // namespace sdk } // namespace tera diff --git a/src/sdk/single_row_txn.cc b/src/sdk/single_row_txn.cc index 0d63563e1..d55c31889 100644 --- a/src/sdk/single_row_txn.cc +++ b/src/sdk/single_row_txn.cc @@ -3,16 +3,18 @@ // found in the LICENSE file. #include +#include #include "common/thread_pool.h" #include "common/base/string_format.h" #include "io/coding.h" +#include "sdk/global_txn_internal.h" #include "sdk/read_impl.h" #include "sdk/single_row_txn.h" #include "sdk/table_impl.h" #include "types.h" -#include "utils/timer.h" +#include "common/timer.h" namespace tera { @@ -27,9 +29,12 @@ SingleRowTxn::SingleRowTxn(Table* table, const std::string& row_key, reader_max_versions_(1), reader_start_timestamp_(kOldestTs), reader_end_timestamp_(kLatestTs), + start_timestamp_(0), + commit_timestamp_(0), mutation_buffer_(table, row_key), user_commit_callback_(NULL), user_commit_context_(NULL) { + start_timestamp_ = get_micros(); } SingleRowTxn::~SingleRowTxn() { @@ -185,6 +190,8 @@ void CommitCallbackWrapper(RowMutation* row_mu) { /// 提交事务 ErrorCode SingleRowTxn::Commit() { + commit_timestamp_ = get_micros(); + InternalNotify(); if (mutation_buffer_.MutationNum() > 0) { if (user_commit_callback_ != NULL) { // use our callback wrapper @@ -266,6 +273,34 @@ void SingleRowTxn::Serialize(RowMutationSequence* mu_seq) { } } +void SingleRowTxn::Ack(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) { + std::unique_ptr mutation(t->NewRowMutation(row_key)); + std::string notify_qulifier = PackNotifyName(column_family, qualifier); + mutation->DeleteColumns(kNotifyColumnFamily, notify_qulifier, start_timestamp_); + this->ApplyMutation(mutation.get()); +} + +void SingleRowTxn::Notify(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) { + Cell cell(t, row_key, column_family, qualifier); + notify_cells_.push_back(cell); +} + +void SingleRowTxn::InternalNotify() { + for (auto cell : notify_cells_) { + std::unique_ptr mutation(cell.Table()->NewRowMutation(cell.RowKey())); + std::string notify_qulifier = PackNotifyName(cell.ColFamily(), cell.Qualifier()); + mutation->Put(kNotifyColumnFamily, notify_qulifier, commit_timestamp_); + // single row transaction may notify different rows + cell.Table()->ApplyMutation(mutation.get()); + } +} + } // namespace tera /* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ diff --git a/src/sdk/single_row_txn.h b/src/sdk/single_row_txn.h index 3a57ea143..96b0fd104 100644 --- a/src/sdk/single_row_txn.h +++ b/src/sdk/single_row_txn.h @@ -17,6 +17,7 @@ class ThreadPool; namespace tera { class TableImpl; +class Cell; class SingleRowTxn : public Transaction { public: @@ -45,8 +46,33 @@ class SingleRowTxn : public Transaction { /// 提交事务 virtual ErrorCode Commit(); - /// 请忽略此接口 - virtual int64_t GetStartTimestamp() { abort(); } + virtual int64_t GetStartTimestamp() { return start_timestamp_; } + + virtual int64_t GetCommitTimestamp() { return commit_timestamp_; } + + virtual void Ack(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier); + + virtual void Notify(Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier); + + // not support + virtual void SetIsolation(const IsolationLevel& isolation_level) { abort(); } + + // use default isolation level snapshot + virtual IsolationLevel Isolation() { return IsolationLevel::kSnapshot; } + + virtual void SetTimeout(int64_t timeout_ms) { + mutation_buffer_.SetTimeOut(timeout_ms); + } + + virtual int64_t Timeout() { + return mutation_buffer_.TimeOut(); + } public: /// 内部读操作回调 @@ -61,6 +87,8 @@ class SingleRowTxn : public Transaction { bool MarkHasRead(); void MarkNoRead(); + + void InternalNotify(); private: Table* table_; const std::string row_key_; @@ -77,10 +105,15 @@ class SingleRowTxn : public Transaction { int64_t reader_start_timestamp_; int64_t reader_end_timestamp_; + int64_t start_timestamp_; + int64_t commit_timestamp_; + RowMutationImpl mutation_buffer_; Callback user_commit_callback_; void* user_commit_context_; + std::vector notify_cells_; + mutable Mutex mu_; }; diff --git a/src/sdk/table_impl.cc b/src/sdk/table_impl.cc index fc153676a..c87567abd 100644 --- a/src/sdk/table_impl.cc +++ b/src/sdk/table_impl.cc @@ -34,7 +34,7 @@ #include "tera.h" #include "utils/crypt.h" #include "utils/string_util.h" -#include "utils/timer.h" +#include "common/timer.h" DECLARE_string(tera_master_meta_table_name); DECLARE_int32(tera_sdk_delay_send_internal); @@ -73,8 +73,6 @@ TableImpl::TableImpl(const std::string& table_name, commit_size_(FLAGS_tera_sdk_batch_size), write_commit_timeout_(FLAGS_tera_sdk_write_send_interval), read_commit_timeout_(FLAGS_tera_sdk_read_send_interval), - mutation_batch_seq_(0), - reader_batch_seq_(0), max_commit_pending_num_(FLAGS_tera_sdk_max_mutation_pending_num), max_reader_pending_num_(FLAGS_tera_sdk_max_reader_pending_num), meta_cond_(&meta_mutex_), @@ -126,11 +124,11 @@ void OpStatCallback(Table* table, SdkTask* task) { if (task->Type() == SdkTask::MUTATION) { ((TableImpl*)table)->StatUserPerfCounter(task->Type(), ((RowMutationImpl*)task)->GetError().GetType(), - common::timer::get_micros() - ((RowMutationImpl*)task)->GetStartTime()); + get_micros() - ((RowMutationImpl*)task)->GetStartTime()); } else if (task->Type() == SdkTask::READ) { ((TableImpl*)table)->StatUserPerfCounter(task->Type(), ((RowReaderImpl*)task)->GetError().GetType(), - common::timer::get_micros() - ((RowReaderImpl*)task)->GetStartTime()); + get_micros() - ((RowReaderImpl*)task)->GetStartTime()); } } @@ -148,13 +146,15 @@ void TableImpl::ApplyMutation(RowMutation* row_mu) { thread_pool_->AddTask(task); return; } - std::vector mu_list; - mu_list.push_back(static_cast(row_mu)); - DistributeMutations(mu_list, true); + std::vector task_list; + task_list.push_back(static_cast((RowMutationImpl*)row_mu)); + int64_t ts = get_micros(); + DistributeTasks(task_list, true, SdkTask::MUTATION); + perf_counter_.hist_async_cost.Add(get_micros() - ts); } void TableImpl::ApplyMutation(const std::vector& row_mutations) { - std::vector mu_list; + std::vector task_list; for (uint32_t i = 0; i < row_mutations.size(); i++) { perf_counter_.user_mu_cnt.Add(1); ((RowMutationImpl*)row_mutations[i])->Prepare(OpStatCallback); @@ -169,9 +169,11 @@ void TableImpl::ApplyMutation(const std::vector& row_mutations) { thread_pool_->AddTask(task); continue; } - mu_list.push_back(static_cast(row_mutations[i])); + task_list.push_back(static_cast((RowMutationImpl*)row_mutations[i])); } - DistributeMutations(mu_list, true); + int64_t ts = get_micros(); + DistributeTasks(task_list, true, SdkTask::MUTATION); + perf_counter_.hist_async_cost.Add(get_micros() - ts); } bool TableImpl::Put(const std::string& row_key, const std::string& family, @@ -427,6 +429,7 @@ void TableImpl::CommitScan(ScanTask* scan_task, if (impl->GetMaxVersion() != 0) { request->set_max_version(impl->GetMaxVersion()); } + request->set_max_qualifiers(impl->GetMaxQualifiers()); if (impl->GetBufferSize() != 0) { request->set_buffer_limit(impl->GetBufferSize()); } @@ -450,7 +453,7 @@ void TableImpl::CommitScan(ScanTask* scan_task, << ", start_key " << request->start() << ", end_key " << request->end() << ", scan to " << server_addr; - request->set_timestamp(common::timer::get_micros()); + request->set_timestamp(get_micros()); std::function done = std::bind(&TableImpl::ScanCallBack, this, scan_task, _1, _2, _3, _4); tabletnode_client.ScanTablet(request, response, done); @@ -460,7 +463,7 @@ void TableImpl::ScanCallBack(ScanTask* scan_task, ScanTabletRequest* request, ScanTabletResponse* response, bool failed, int error_code) { - perf_counter_.rpc_s.Add(common::timer::get_micros() - request->timestamp()); + perf_counter_.rpc_s.Add(get_micros() - request->timestamp()); perf_counter_.rpc_s_cnt.Inc(); ResultStreamImpl* stream = scan_task->stream; @@ -548,202 +551,111 @@ bool TableImpl::OpenInternal(ErrorCode* err) { return true; } -void TableImpl::DistributeMutations(const std::vector& mu_list, - bool called_by_user) { - typedef std::map > TsMuMap; - TsMuMap ts_mu_list; +void TableImpl::DistributeTasks(const std::vector& task_list, + bool called_by_user, + SdkTask::TYPE task_type) { + typedef std::map > TsTaskMap; + TsTaskMap ts_task_list; int64_t sync_min_timeout = -1; - std::vector sync_mu_list; + std::vector sync_task_list; + + int64_t max_pending_counter; + Counter* task_cnt = NULL; + Counter* pending_counter = NULL; + SdkTask::TimeoutFunc timeout_task; + std::string err_reason; + if (task_type == SdkTask::MUTATION) { + task_cnt = &(perf_counter_.mutate_cnt); + pending_counter = &(cur_commit_pending_counter_); + max_pending_counter = max_commit_pending_num_; + err_reason = "pending too much mutations, try it later."; + timeout_task = std::bind(&TableImpl::MutationTimeout, this, _1); + } else if (task_type == SdkTask::READ) { + task_cnt = &(perf_counter_.reader_cnt); + pending_counter = &(cur_reader_pending_counter_); + max_pending_counter = max_reader_pending_num_; + err_reason = "pending too much readers, try it later."; + timeout_task = std::bind(&TableImpl::ReaderTimeout, this, _1); + } else { + assert(0); + } - // evaluate minimum timeout of sync requests - if (called_by_user) { - for (uint32_t i = 0; i < mu_list.size(); i++) { - RowMutationImpl* row_mutation = (RowMutationImpl*)mu_list[i]; - if (!row_mutation->IsAsync()) { - sync_mu_list.push_back(row_mutation); - int64_t row_timeout = row_mutation->TimeOut() > 0 ? row_mutation->TimeOut() : timeout_; - if (row_timeout > 0 && (sync_min_timeout <= 0 || sync_min_timeout > row_timeout)) { - sync_min_timeout = row_timeout; - } + for (uint32_t i = 0; called_by_user && i < task_list.size(); i++) { + SdkTask* task = (SdkTask*)task_list[i]; + if (!task->IsAsync()) { + sync_task_list.push_back(task); + int64_t task_timeout = task->TimeOut() > 0 ? task->TimeOut() : timeout_; + if (task_timeout > 0 && (sync_min_timeout <= 0 || sync_min_timeout > task_timeout)) { + sync_min_timeout = task_timeout; } } } - for (uint32_t i = 0; i < mu_list.size(); i++) { - RowMutationImpl* row_mutation = (RowMutationImpl*)mu_list[i]; - perf_counter_.mutate_cnt.Inc(); + for (uint32_t i = 0; i < task_list.size(); i++) { + SdkTask* task = (SdkTask*)task_list[i]; + task_cnt->Inc(); if (called_by_user) { - row_mutation->SetId(next_task_id_.Inc()); + task->SetId(next_task_id_.Inc()); - int64_t row_timeout = -1; - if (!row_mutation->IsAsync()) { - row_timeout = sync_min_timeout; + int64_t task_timeout = -1; + if (!task->IsAsync()) { + task_timeout = sync_min_timeout; } else { - row_timeout = row_mutation->TimeOut() > 0 ? row_mutation->TimeOut() : timeout_; + task_timeout = task->TimeOut() > 0 ? task->TimeOut() : timeout_; } - SdkTask::TimeoutFunc task = std::bind(&TableImpl::MutationTimeout, this, _1); - task_pool_.PutTask(row_mutation, row_timeout, task); + perf_counter_.total_task_cnt.Inc(); + task_pool_.PutTask(task, task_timeout, timeout_task); } // flow control if (called_by_user - && cur_commit_pending_counter_.Add(row_mutation->MutationNum()) > max_commit_pending_num_ - && row_mutation->IsAsync()) { + && pending_counter->Inc() > max_pending_counter + && task->IsAsync()) { if (FLAGS_tera_sdk_async_blocking_enabled) { - while (cur_commit_pending_counter_.Get() > max_commit_pending_num_) { + while (pending_counter->Get() > max_pending_counter) { usleep(100000); } } else { - cur_commit_pending_counter_.Sub(row_mutation->MutationNum()); - row_mutation->SetError(ErrorCode::kBusy, "pending too much mutations, try it later."); - ThreadPool::Task task = - std::bind(&TableImpl::BreakRequest, this, row_mutation->GetId()); - row_mutation->DecRef(); - thread_pool_->AddTask(task); + pending_counter->Dec(); + task->SetError(ErrorCode::kBusy, err_reason); + ThreadPool::Task break_task = + std::bind(&TableImpl::BreakRequest, this, task->GetId()); + task->DecRef(); + thread_pool_->AddTask(break_task); continue; } } std::string server_addr; - if (!GetTabletAddrOrScheduleUpdateMeta(row_mutation->RowKey(), - row_mutation, &server_addr)) { + if (!GetTabletAddrOrScheduleUpdateMeta(task->RowKey(), + task, &server_addr)) { + perf_counter_.meta_sched_cnt.Inc(); continue; } - - ts_mu_list[server_addr].push_back(row_mutation); + ts_task_list[server_addr].push_back(task); } - TsMuMap::iterator it = ts_mu_list.begin(); - for (; it != ts_mu_list.end(); ++it) { - PackMutations(it->first, it->second); + TsTaskMap::iterator it = ts_task_list.begin(); + for (; it != ts_task_list.end(); ++it) { + PackSdkTasks(it->first, it->second, task_type); } - // 从现在开始,所有异步的row_mutation都不可以再操作了,因为随时会被用户释放 + // 从现在开始,所有异步的row_mutation都不可以再操作了,因为随时会被用户释放 // 不是用户调用的,立即返回 if (!called_by_user) { return; } // 等待同步操作返回或超时 - for (uint32_t i = 0; i < sync_mu_list.size(); i++) { - while (cur_commit_pending_counter_.Get() > max_commit_pending_num_) { + for (uint32_t i = 0; i < sync_task_list.size(); i++) { + while (pending_counter->Get() > max_pending_counter) { usleep(100000); } - - RowMutationImpl* row_mutation = (RowMutationImpl*)sync_mu_list[i]; - row_mutation->Wait(); - } -} - -void TableImpl::DistributeMutationsById(std::vector* mu_id_list) { - std::vector mu_list; - for (uint32_t i = 0; i < mu_id_list->size(); ++i) { - int64_t mu_id = (*mu_id_list)[i]; - SdkTask* task = task_pool_.GetTask(mu_id); - if (task == NULL) { - VLOG(10) << "mutation " << mu_id << " timeout when retry mutate";; - continue; - } - CHECK_EQ(task->Type(), SdkTask::MUTATION); - RowMutationImpl* row_mutation = (RowMutationImpl*)task; - mu_list.push_back(row_mutation); - } - DistributeMutations(mu_list, false); - delete mu_id_list; -} - -void TableImpl::PackMutations(const std::string& server_addr, - std::vector& mu_list) { - MutexLock lock(&mutation_batch_mutex_); - TaskBatch* mutation_batch = NULL; - bool is_instant = false; - for (size_t i = 0; i < mu_list.size(); ++i) { - // find existing batch or create a new batch - if (mutation_batch == NULL) { - std::map::iterator it = mutation_batch_map_.find(server_addr); - if (it != mutation_batch_map_.end()) { - mutation_batch = &it->second; - } else { - mutation_batch = &mutation_batch_map_[server_addr]; - mutation_batch->sequence_num = mutation_batch_seq_++; - mutation_batch->row_id_list = new std::vector; - ThreadPool::Task task = std::bind(&TableImpl::MutationBatchTimeout, this, - server_addr, mutation_batch->sequence_num); - int64_t timer_id = thread_pool_->DelayTask(write_commit_timeout_, task); - mutation_batch->timer_id = timer_id; - mutation_batch->byte_size = 0; - } - } - - // put mutation into the batch - RowMutationImpl* row_mutation = mu_list[i]; - mutation_batch->row_id_list->push_back(row_mutation->GetId()); - mutation_batch->byte_size += row_mutation->Size(); - is_instant |= !row_mutation->IsAsync(); - row_mutation->DecRef(); - - // commit the batch if: - // 1) batch_byte_size >= max_rpc_byte_size - // for the *LAST* batch, commit it if: - // 2) any mutation is sync (flush == true) - // 3) batch_row_num >= min_batch_row_num - if (mutation_batch->byte_size >= kMaxRpcSize || - (i == mu_list.size() - 1 && - (is_instant || mutation_batch->row_id_list->size() >= commit_size_))) { - std::vector* mu_id_list = mutation_batch->row_id_list; - uint64_t timer_id = mutation_batch->timer_id; - const bool non_block_cancel = true; - bool is_running = false; - if (!thread_pool_->CancelTask(timer_id, non_block_cancel, &is_running)) { - CHECK(is_running); // this delay task must be waiting for mutation_batch_mutex_ - } - mutation_batch_map_.erase(server_addr); - mutation_batch_mutex_.Unlock(); - CommitMutationsById(server_addr, *mu_id_list); - delete mu_id_list; - mutation_batch = NULL; - is_instant = false; - mutation_batch_mutex_.Lock(); - } + SdkTask* task = (SdkTask*)sync_task_list[i]; + task->Wait(); } } -void TableImpl::MutationBatchTimeout(std::string server_addr, uint64_t batch_seq) { - std::vector* mu_id_list = NULL; - { - MutexLock lock(&mutation_batch_mutex_); - std::map::iterator it = - mutation_batch_map_.find(server_addr); - if (it == mutation_batch_map_.end()) { - return; - } - TaskBatch* mutation_batch = &it->second; - if (mutation_batch->sequence_num != batch_seq) { - return; - } - mu_id_list = mutation_batch->row_id_list; - mutation_batch_map_.erase(it); - } - CommitMutationsById(server_addr, *mu_id_list); - delete mu_id_list; -} - -void TableImpl::CommitMutationsById(const std::string& server_addr, - std::vector& mu_id_list) { - std::vector mu_list; - for (size_t i = 0; i < mu_id_list.size(); i++) { - int64_t mu_id = mu_id_list[i]; - SdkTask* task = task_pool_.GetTask(mu_id); - if (task == NULL) { - VLOG(10) << "mutation " << mu_id << " timeout"; - continue; - } - CHECK_EQ(task->Type(), SdkTask::MUTATION); - mu_list.push_back((RowMutationImpl*)task); - } - CommitMutations(server_addr, mu_list); -} - void TableImpl::CommitMutations(const std::string& server_addr, std::vector& mu_list) { tabletnode::TabletNodeClient tabletnode_client_async(server_addr); @@ -776,7 +688,7 @@ void TableImpl::CommitMutations(const std::string& server_addr, request->set_is_instant(is_instant); VLOG(20) << "commit " << mu_list.size() << " mutations to " << server_addr; - request->set_timestamp(common::timer::get_micros()); + request->set_timestamp(get_micros()); std::function done = std::bind(&TableImpl::MutateCallBack, this, mu_id_list, _1, _2, _3, _4); tabletnode_client_async.WriteTablet(request, response, done); @@ -786,7 +698,7 @@ void TableImpl::MutateCallBack(std::vector* mu_id_list, WriteTabletRequest* request, WriteTabletResponse* response, bool failed, int error_code) { - perf_counter_.rpc_w.Add(common::timer::get_micros() - request->timestamp()); + perf_counter_.rpc_w.Add(get_micros() - request->timestamp()); perf_counter_.rpc_w_cnt.Inc(); if (failed) { if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || @@ -807,7 +719,7 @@ void TableImpl::MutateCallBack(std::vector* mu_id_list, } std::map* > retry_times_list; - std::vector not_in_range_list; + std::vector not_in_range_list; for (uint32_t i = 0; i < mu_id_list->size(); ++i) { const std::string& row = request->row_list(i).row_key(); int64_t mu_id = (*mu_id_list)[i]; @@ -835,10 +747,10 @@ void TableImpl::MutateCallBack(std::vector* mu_id_list, } // only for flow control - cur_commit_pending_counter_.Sub(row_mutation->MutationNum()); - int64_t perf_time = common::timer::get_micros(); + cur_commit_pending_counter_.Dec(); + int64_t perf_time = get_micros(); row_mutation->RunCallback(); - perf_counter_.user_callback.Add(common::timer::get_micros() - perf_time); + perf_counter_.user_callback.Add(get_micros() - perf_time); perf_counter_.user_callback_cnt.Inc(); continue; } @@ -860,7 +772,7 @@ void TableImpl::MutateCallBack(std::vector* mu_id_list, if (err == kKeyNotInRange) { perf_counter_.mutate_range_cnt.Inc(); row_mutation->IncRetryTimes(); - not_in_range_list.push_back(row_mutation); + not_in_range_list.push_back(task); } else { row_mutation->IncRetryTimes(); std::vector* retry_mu_id_list = NULL; @@ -878,7 +790,7 @@ void TableImpl::MutateCallBack(std::vector* mu_id_list, } if (not_in_range_list.size() > 0) { - DistributeMutations(not_in_range_list, false); + DistributeTasks(not_in_range_list, false, SdkTask::MUTATION); } std::map* >::iterator it; for (it = retry_times_list.begin(); it != retry_times_list.end(); ++it) { @@ -894,6 +806,22 @@ void TableImpl::MutateCallBack(std::vector* mu_id_list, delete mu_id_list; } +void TableImpl::DistributeMutationsById(std::vector* mu_id_list) { + std::vector task_list; + for (uint32_t i = 0; i < mu_id_list->size(); ++i) { + int64_t mu_id = (*mu_id_list)[i]; + SdkTask* task = task_pool_.GetTask(mu_id); + if (task == NULL) { + VLOG(10) << "mutation " << mu_id << " timeout when retry mutate";; + continue; + } + CHECK_EQ(task->Type(), SdkTask::MUTATION); + task_list.push_back(task); + } + DistributeTasks(task_list, false, SdkTask::MUTATION); + delete mu_id_list; +} + void TableImpl::MutationTimeout(SdkTask* task) { perf_counter_.mutate_timeout_cnt.Inc(); CHECK_NOTNULL(task); @@ -907,199 +835,33 @@ void TableImpl::MutationTimeout(SdkTask* task) { ScheduleUpdateMeta(row_mutation->RowKey(), row_mutation->GetMetaTimeStamp()); } + + std::string err_reason; if (row_mutation->RetryTimes() == 0) { perf_counter_.mutate_queue_timeout_cnt.Inc(); - std::string err_reason = StringFormat("commit %lld times, retry 0 times, in %u ms.", - row_mutation->GetCommitTimes(), timeout_); - row_mutation->SetError(ErrorCode::kTimeout, err_reason); + err_reason = StringFormat("commit %lld times, retry 0 times, in %u ms.", + row_mutation->GetCommitTimes(), timeout_); } else { - std::string err_reason = StringFormat("commit %lld times, retry %u times, in %u ms. last error: %s", - row_mutation->GetCommitTimes(), row_mutation->RetryTimes(), - timeout_, StatusCodeToString(err).c_str()); - row_mutation->SetError(ErrorCode::kSystem, err_reason); + err_reason = StringFormat("commit %lld times, retry %u times, in %u ms. last error: %s", + row_mutation->GetCommitTimes(), row_mutation->RetryTimes(), + timeout_, StatusCodeToString(err).c_str()); } + row_mutation->SetError(ErrorCode::kTimeout, err_reason); // only for flow control - cur_commit_pending_counter_.Sub(row_mutation->MutationNum()); - int64_t perf_time = common::timer::get_micros(); + cur_commit_pending_counter_.Dec(); + int64_t perf_time = get_micros(); row_mutation->RunCallback(); - perf_counter_.user_callback.Add(common::timer::get_micros() - perf_time); + perf_counter_.user_callback.Add(get_micros() - perf_time); perf_counter_.user_callback_cnt.Inc(); } -bool TableImpl::GetTabletLocation(std::vector* tablets, - ErrorCode* err) { - return false; -} - -bool TableImpl::GetDescriptor(TableDescriptor* desc, ErrorCode* err) { - return false; -} - void TableImpl::DistributeReaders(const std::vector& row_reader_list, bool called_by_user) { - typedef std::map > TsReaderMap; - TsReaderMap ts_reader_list; - - int64_t sync_min_timeout = -1; - std::vector sync_reader_list; - - if (called_by_user) { - for (uint32_t i = 0; i < row_reader_list.size(); i++) { - RowReaderImpl* row_reader = (RowReaderImpl*)row_reader_list[i]; - if (row_reader->IsAsync()) { - continue; - } - sync_reader_list.push_back(row_reader); - int64_t row_timeout = row_reader->TimeOut() > 0 ? row_reader->TimeOut() : timeout_; - if (row_timeout > 0 && (sync_min_timeout <= 0 || sync_min_timeout > row_timeout)) { - sync_min_timeout = row_timeout; - } - } - } - - for (uint32_t i = 0; i < row_reader_list.size(); i++) { - perf_counter_.reader_cnt.Inc(); - RowReaderImpl* row_reader = (RowReaderImpl*)row_reader_list[i]; - if (called_by_user) { - row_reader->SetId(next_task_id_.Inc()); - - int64_t row_timeout = sync_min_timeout; - if (row_reader->IsAsync()) { - row_timeout = row_reader->TimeOut() > 0 ? row_reader->TimeOut() : timeout_; - } - SdkTask::TimeoutFunc task = std::bind(&TableImpl::ReaderTimeout, this, _1); - task_pool_.PutTask(row_reader, row_timeout, task); - } - - // flow control - if (called_by_user - && cur_reader_pending_counter_.Inc() > max_reader_pending_num_ - && row_reader->IsAsync()) { - if (FLAGS_tera_sdk_async_blocking_enabled) { - while (cur_reader_pending_counter_.Get() > max_reader_pending_num_) { - usleep(100000); - } - } else { - cur_reader_pending_counter_.Dec(); - row_reader->SetError(ErrorCode::kBusy, "pending too much readers, try it later."); - ThreadPool::Task task = - std::bind(&TableImpl::BreakRequest, this, row_reader->GetId()); - row_reader->DecRef(); - thread_pool_->AddTask(task); - continue; - } - } - - std::string server_addr; - if (!GetTabletAddrOrScheduleUpdateMeta(row_reader->RowName(), row_reader, - &server_addr)) { - continue; - } - - std::vector& ts_row_readers = ts_reader_list[server_addr]; - ts_row_readers.push_back(row_reader); - } - - TsReaderMap::iterator it = ts_reader_list.begin(); - for (; it != ts_reader_list.end(); ++it) { - std::vector& reader_list = it->second; - PackReaders(it->first, reader_list); - } - // 从现在开始,所有异步的row_reader都不可以再操作了,因为随时会被用户释放 - - // 不是用户调用的,立即返回 - if (!called_by_user) { - return; - } - - // 等待同步操作返回或超时 - for (uint32_t i = 0; i < sync_reader_list.size(); i++) { - while (cur_reader_pending_counter_.Get() > max_reader_pending_num_) { - usleep(100000); - } - - RowReaderImpl* row_reader = (RowReaderImpl*)sync_reader_list[i]; - row_reader->Wait(); + std::vector task_list; + for (size_t i = 0; i < row_reader_list.size(); ++i) { + task_list.push_back((SdkTask*)(row_reader_list[i])); } -} - -void TableImpl::PackReaders(const std::string& server_addr, - std::vector& reader_list) { - MutexLock lock(&reader_batch_mutex_); - TaskBatch* reader_buffer = NULL; - std::map::iterator it = reader_batch_map_.find(server_addr); - if (it != reader_batch_map_.end()) { - reader_buffer = &it->second; - } else { - reader_buffer = &reader_batch_map_[server_addr]; - reader_buffer->sequence_num = reader_batch_seq_++; - reader_buffer->row_id_list = new std::vector; - ThreadPool::Task task = std::bind(&TableImpl::ReaderBatchTimeout, this, - server_addr, reader_buffer->sequence_num); - uint64_t timer_id = thread_pool_->DelayTask(read_commit_timeout_, task); - reader_buffer->timer_id = timer_id; - } - - bool is_instant = false; - for (size_t i = 0; i < reader_list.size(); ++i) { - RowReaderImpl* reader = reader_list[i]; - reader_buffer->row_id_list->push_back(reader->GetId()); - is_instant |= !reader->IsAsync(); - reader->DecRef(); - } - - if (reader_buffer->row_id_list->size() >= commit_size_ || is_instant) { - std::vector* reader_id_list = reader_buffer->row_id_list; - uint64_t timer_id = reader_buffer->timer_id; - const bool non_block_cancel = true; - bool is_running = false; - if (!thread_pool_->CancelTask(timer_id, non_block_cancel, &is_running)) { - CHECK(is_running); // this delay task must be waiting for reader_batch_map_ - } - reader_batch_map_.erase(server_addr); - reader_batch_mutex_.Unlock(); - CommitReadersById(server_addr, *reader_id_list); - delete reader_id_list; - reader_buffer = NULL; - reader_batch_mutex_.Lock(); - } -} - -void TableImpl::ReaderBatchTimeout(std::string server_addr, uint64_t batch_seq) { - std::vector* reader_id_list = NULL; - { - MutexLock lock(&reader_batch_mutex_); - std::map::iterator it = - reader_batch_map_.find(server_addr); - if (it == reader_batch_map_.end()) { - return; - } - TaskBatch* reader_buffer = &it->second; - if (reader_buffer->sequence_num != batch_seq) { - return; - } - reader_id_list = reader_buffer->row_id_list; - reader_batch_map_.erase(it); - } - CommitReadersById(server_addr, *reader_id_list); - delete reader_id_list; -} - -void TableImpl::CommitReadersById(const std::string server_addr, - std::vector& reader_id_list) { - std::vector reader_list; - for (size_t i = 0; i < reader_id_list.size(); ++i) { - int64_t reader_id = reader_id_list[i]; - SdkTask* task = task_pool_.GetTask(reader_id); - if (task == NULL) { - VLOG(10) << "reader " << reader_id << " timeout when commit read";; - continue; - } - CHECK_EQ(task->Type(), SdkTask::READ); - RowReaderImpl* reader = (RowReaderImpl*)task; - reader_list.push_back(reader); - } - CommitReaders(server_addr, reader_list); + DistributeTasks(task_list, called_by_user, SdkTask::READ); } void TableImpl::CommitReaders(const std::string server_addr, @@ -1122,7 +884,7 @@ void TableImpl::CommitReaders(const std::string server_addr, row_reader->DecRef(); } VLOG(20) << "commit " << reader_list.size() << " reads to " << server_addr; - request->set_timestamp(common::timer::get_micros()); + request->set_timestamp(get_micros()); std::function done = std::bind(&TableImpl::ReaderCallBack, this, reader_id_list, _1, _2, _3, _4); tabletnode_client_async.ReadTablet(request, response, done); @@ -1132,7 +894,7 @@ void TableImpl::ReaderCallBack(std::vector* reader_id_list, ReadTabletRequest* request, ReadTabletResponse* response, bool failed, int error_code) { - perf_counter_.rpc_r.Add(common::timer::get_micros() - request->timestamp()); + perf_counter_.rpc_r.Add(get_micros() - request->timestamp()); perf_counter_.rpc_r_cnt.Inc(); if (failed) { if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || @@ -1185,9 +947,9 @@ void TableImpl::ReaderCallBack(std::vector* reader_id_list, } else { // err == kSnapshotNotExist row_reader->SetError(ErrorCode::kNotFound, "snapshot not found"); } - int64_t perf_time = common::timer::get_micros(); + int64_t perf_time = get_micros(); row_reader->RunCallback(); - perf_counter_.user_callback.Add(common::timer::get_micros() - perf_time); + perf_counter_.user_callback.Add(get_micros() - perf_time); perf_counter_.user_callback_cnt.Inc(); // only for flow control cur_reader_pending_counter_.Dec(); @@ -1273,25 +1035,161 @@ void TableImpl::ReaderTimeout(SdkTask* task) { ScheduleUpdateMeta(row_reader->RowName(), row_reader->GetMetaTimeStamp()); } + + std::string err_reason; if (row_reader->RetryTimes() == 0) { perf_counter_.reader_queue_timeout_cnt.Inc(); - std::string err_reason = StringFormat("commit %lld times, retry 0 times, in %u ms.", - row_reader->GetCommitTimes(), timeout_); - row_reader->SetError(ErrorCode::kTimeout, err_reason); + err_reason = StringFormat("commit %lld times, retry 0 times, in %u ms.", + row_reader->GetCommitTimes(), timeout_); } else { - std::string err_reason = StringFormat("commit %lld times, retry %u times, in %u ms. last error: %s", - row_reader->GetCommitTimes(), row_reader->RetryTimes(), - timeout_, StatusCodeToString(err).c_str()); - row_reader->SetError(ErrorCode::kSystem, err_reason); + err_reason = StringFormat("commit %lld times, retry %u times, in %u ms. last error: %s", + row_reader->GetCommitTimes(), row_reader->RetryTimes(), + timeout_, StatusCodeToString(err).c_str()); } - int64_t perf_time = common::timer::get_micros(); + row_reader->SetError(ErrorCode::kTimeout, err_reason); + int64_t perf_time = get_micros(); row_reader->RunCallback(); - perf_counter_.user_callback.Add(common::timer::get_micros() - perf_time); + perf_counter_.user_callback.Add(get_micros() - perf_time); perf_counter_.user_callback_cnt.Inc(); // only for flow control cur_reader_pending_counter_.Dec(); } +void TableImpl::PackSdkTasks(const std::string& server_addr, + std::vector& task_list, + SdkTask::TYPE task_type) { + Mutex* mutex = NULL; + std::map* task_batch_map = NULL; + SdkTask::TimeoutFunc task; + uint64_t commit_timeout = 10000; + uint32_t commit_size = commit_size_; + if (task_type == SdkTask::MUTATION) { + mutex = &mutation_batch_mutex_; + task_batch_map = &mutation_batch_map_; + commit_timeout = write_commit_timeout_; + } else if (task_type == SdkTask::READ) { + mutex = &reader_batch_mutex_; + task_batch_map = &reader_batch_map_; + commit_timeout = read_commit_timeout_; + } else { + assert(0); + } + + TaskBatch* task_batch = NULL; + bool is_instant = false; + MutexLock lock(mutex); + for (size_t i = 0; i < task_list.size(); ++i) { + // find existing batch or create a new batch + if (task_batch == NULL) { + std::map::iterator it = task_batch_map->find(server_addr); + if (it != task_batch_map->end()) { + task_batch = it->second; + } else { + task_batch = new TaskBatch; + task_batch->type = task_type; + task_batch->mutex = mutex; + task_batch->task_batch_map = task_batch_map; + task_batch->byte_size = 0; + task_batch->server_addr = server_addr; + task_batch->row_id_list = new std::vector; + + task_batch->SetId(next_task_id_.Inc()); + (*task_batch_map)[server_addr] = task_batch; + SdkTask::TimeoutFunc task = std::bind(&TableImpl::TaskBatchTimeout, this, _1); + task_pool_.PutTask(task_batch, commit_timeout, task); + task_batch->DecRef(); + } + } + + // put task into the batch + SdkTask* sdk_task = task_list[i]; + task_batch->row_id_list->push_back(sdk_task->GetId()); + task_batch->byte_size += sdk_task->Size(); + is_instant |= !sdk_task->IsAsync(); + sdk_task->DecRef(); + + // commit the batch if: + // 1) batch_byte_size >= max_rpc_byte_size + // for the *LAST* batch, commit it if: + // 2) any mutation is sync (flush == true) + // 3) batch_row_num >= min_batch_row_num + // 4) commit timeout + if (task_batch->byte_size >= kMaxRpcSize || + ((i == task_list.size() - 1) && + (is_instant || + (task_batch->row_id_list->size() >= commit_size)))) { + std::vector* task_id_list = task_batch->row_id_list; + task_batch->row_id_list = NULL; + task_batch_map->erase(server_addr); + mutex->Unlock(); + + CommitTasksById(server_addr, *task_id_list, task_type); + delete task_id_list; + task_batch = NULL; + is_instant = false; + mutex->Lock(); + } + } +} + +void TableImpl::TaskBatchTimeout(SdkTask* task) { + std::vector* task_id_list = NULL; + CHECK_NOTNULL(task); + CHECK_EQ(task->Type(), SdkTask::TASKBATCH); + TaskBatch* task_batch = (TaskBatch*)task; + task_batch->ExcludeOtherRef(); + + const std::string& server_addr = task_batch->server_addr; + SdkTask::TYPE task_type = task_batch->type; + Mutex* mutex = task_batch->mutex; + std::map* task_batch_map = task_batch->task_batch_map; + { + MutexLock lock(mutex); + std::map::iterator it = + task_batch_map->find(server_addr); + if (it != task_batch_map->end() && + task_batch->GetId() == it->second->GetId()) { + task_id_list = task_batch->row_id_list; + task_batch->row_id_list = NULL; + task_batch_map->erase(it); + } + } + + if (task_id_list != NULL) { + CommitTasksById(server_addr, *task_id_list, task_type); + delete task_id_list; + } + delete task_batch; +} + +void TableImpl::CommitTasksById(const std::string& server_addr, + std::vector& task_id_list, + SdkTask::TYPE task_type) { + std::vector mutation_list; + std::vector reader_list; + + for (size_t i = 0; i < task_id_list.size(); i++) { + int64_t task_id = task_id_list[i]; + SdkTask* task = task_pool_.GetTask(task_id); + if (task == NULL) { + VLOG(10) << "commit task, type " << task_type << ", id " << task_id << " timeout"; + continue; + } + perf_counter_.total_commit_cnt.Inc(); + CHECK_EQ(task->Type(), task_type); + if (task_type == SdkTask::MUTATION) { + mutation_list.push_back((RowMutationImpl*)task); + } else if (task_type == SdkTask::READ) { + reader_list.push_back((RowReaderImpl*)task); + } + } + if (task_type == SdkTask::MUTATION) { + CommitMutations(server_addr, mutation_list); + } else if (task_type == SdkTask::READ) { + CommitReaders(server_addr, reader_list); + } +} + bool TableImpl::GetTabletMetaForKey(const std::string& key, TabletMeta* meta) { MutexLock lock(&meta_mutex_); TabletMetaNode* node = GetTabletMetaNodeForKey(key); @@ -1486,7 +1384,7 @@ void TableImpl::ScanMetaTableAsync(const std::string& key_start, const std::stri std::function done = std::bind(&TableImpl::ScanMetaTableCallBack, this, key_start, key_end, - expand_key_end, ::common::timer::get_micros(), _1, _2, _3, _4); + expand_key_end, get_micros(), _1, _2, _3, _4); tabletnode_client_async.ScanTablet(request, response, done); } @@ -1497,7 +1395,7 @@ void TableImpl::ScanMetaTableCallBack(std::string key_start, ScanTabletRequest* request, ScanTabletResponse* response, bool failed, int error_code) { - perf_counter_.get_meta.Add(::common::timer::get_micros() - start_time); + perf_counter_.get_meta.Add(get_micros() - start_time); perf_counter_.get_meta_cnt.Inc(); if (failed) { if (error_code == sofa::pbrpc::RPC_ERROR_SERVER_SHUTDOWN || @@ -1699,8 +1597,8 @@ void TableImpl::WakeUpPendingRequest(const TabletMetaNode& node) { const std::string& server_addr = node.meta.server_addr(); int64_t meta_timestamp = node.update_time; - std::vector mutation_list; - std::vector reader_list; + std::vector mutation_list; + std::vector reader_list; std::map >::iterator it = pending_task_id_list_.lower_bound(start_key); @@ -1711,6 +1609,7 @@ void TableImpl::WakeUpPendingRequest(const TabletMetaNode& node) { std::list& task_id_list = it->second; for (std::list::iterator itask = task_id_list.begin(); itask != task_id_list.end(); ++itask) { + perf_counter_.meta_update_cnt.Inc(); int64_t task_id = *itask; SdkTask* task = task_pool_.GetTask(task_id); if (task == NULL) { @@ -1721,12 +1620,10 @@ void TableImpl::WakeUpPendingRequest(const TabletMetaNode& node) { switch (task->Type()) { case SdkTask::READ: { - RowReaderImpl* reader = (RowReaderImpl*)task; - reader_list.push_back(reader); + reader_list.push_back(task); } break; case SdkTask::MUTATION: { - RowMutationImpl* mutation = (RowMutationImpl*)task; - mutation_list.push_back(mutation); + mutation_list.push_back(task); } break; case SdkTask::SCAN: { ScanTask* scan_task = (ScanTask*)task; @@ -1743,10 +1640,10 @@ void TableImpl::WakeUpPendingRequest(const TabletMetaNode& node) { } if (mutation_list.size() > 0) { - PackMutations(server_addr, mutation_list); + PackSdkTasks(server_addr, mutation_list, SdkTask::MUTATION); } if (reader_list.size() > 0) { - PackReaders(server_addr, reader_list); + PackSdkTasks(server_addr, reader_list, SdkTask::READ); } } @@ -2068,6 +1965,19 @@ void TableImpl::PerfCounter::DoDumpPerfCounterLog(const std::string& log_prefix) << " cost_90: " << hist_read_cost.Percentile(90) << " cost_99: " << hist_read_cost.Percentile(99); hist_read_cost.Clear(); + + LOG(INFO) << log_prefix << "[hist_async_cost]" + << " cost_ave: " << hist_async_cost.Average() + << " cost_50: " << hist_async_cost.Percentile(50) + << " cost_90: " << hist_async_cost.Percentile(90) + << " cost_99: " << hist_async_cost.Percentile(99); + hist_async_cost.Clear(); + + LOG(INFO) << log_prefix << "[total]" + << " meta_sched_cnt: " << meta_sched_cnt.Get() + << " meta_update_cnt: " << meta_update_cnt.Get() + << " total_task_cnt: " << total_task_cnt.Get() + << " total_commit_cnt: " << total_commit_cnt.Get(); } void TableImpl::DelayTaskWrapper(ThreadPool::Task task, int64_t task_id) { @@ -2148,6 +2058,15 @@ void TableImpl::StatUserPerfCounter(enum SdkTask::TYPE op, ErrorCode::ErrorCodeT } } +bool TableImpl::GetTabletLocation(std::vector* tablets, + ErrorCode* err) { + return false; +} + +bool TableImpl::GetDescriptor(TableDescriptor* desc, ErrorCode* err) { + return false; +} + /// 创建事务 Transaction* TableImpl::StartRowTransaction(const std::string& row_key) { return new SingleRowTxn((Table*)this, row_key, thread_pool_); diff --git a/src/sdk/table_impl.h b/src/sdk/table_impl.h index 088a2c206..6e0986b62 100644 --- a/src/sdk/table_impl.h +++ b/src/sdk/table_impl.h @@ -16,7 +16,7 @@ #include "sdk/sdk_task.h" #include "sdk/sdk_zk.h" #include "tera.h" -#include "utils/counter.h" +#include "common/counter.h" namespace tera { @@ -261,10 +261,16 @@ class TableImpl : public Table { Counter user_read_fail; ::leveldb::Histogram hist_read_cost; + ::leveldb::Histogram hist_async_cost; + Counter meta_sched_cnt; + Counter meta_update_cnt; + Counter total_task_cnt; + Counter total_commit_cnt; + void DoDumpPerfCounterLog(const std::string& log_prefix); PerfCounter() { - start_time = common::timer::get_micros(); + start_time = get_micros(); } }; private: @@ -274,22 +280,13 @@ class TableImpl : public Table { std::vector* kv_list, ErrorCode* err); - // 将一批mutation根据rowkey分配给各个TS - void DistributeMutations(const std::vector& mu_list, - bool called_by_user); + void DistributeTasks(const std::vector& task_list, + bool called_by_user, + SdkTask::TYPE task_type); void DistributeMutationsById(std::vector* retry_mu_id_list); - // 分配完成后将mutation打包 - void PackMutations(const std::string& server_addr, - std::vector& mu_list); - - // mutation打包不满但到达最大等待时间 - void MutationBatchTimeout(std::string server_addr, uint64_t batch_seq); - // 通过异步RPC将mutation提交至TS - void CommitMutationsById(const std::string& server_addr, - std::vector& mu_id_list); void CommitMutations(const std::string& server_addr, std::vector& mu_list); @@ -306,21 +303,12 @@ class TableImpl : public Table { void DistributeReaders(const std::vector& row_reader_list, bool called_by_user); - void DistributeReadersById(std::vector* reader_id_list); - - // 分配完成后将reader打包 - void PackReaders(const std::string& server_addr, - std::vector& reader_list); - - // reader打包不满但到达最大等待时间 - void ReaderBatchTimeout(std::string server_addr, uint64_t batch_seq); - // 通过异步RPC将reader提交至TS - void CommitReadersById(const std::string server_addr, - std::vector& reader_id_list); void CommitReaders(const std::string server_addr, std::vector& reader_list); + void DistributeReadersById(std::vector* reader_id_list); + // reader RPC回调 void ReaderCallBack(std::vector* reader_id_list, ReadTabletRequest* request, @@ -330,6 +318,14 @@ class TableImpl : public Table { // reader到达用户设置的超时时间但尚未处理完 void ReaderTimeout(SdkTask* sdk_task); + void PackSdkTasks(const std::string& server_addr, + std::vector& task_list, + SdkTask::TYPE task_type); + void TaskBatchTimeout(SdkTask* task); + void CommitTasksById(const std::string& server_addr, + std::vector& task_id_list, + SdkTask::TYPE task_type); + void ScanTabletAsync(ScanTask* scan_task, bool called_by_user); void CommitScan(ScanTask* scan_task, const std::string& server_addr); @@ -415,11 +411,22 @@ class TableImpl : public Table { TableImpl(const TableImpl&); void operator=(const TableImpl&); - struct TaskBatch { - uint64_t sequence_num; - uint64_t timer_id; + struct TaskBatch : public SdkTask { uint64_t byte_size; + std::string server_addr; + SdkTask::TYPE type; + Mutex* mutex; + std::map* task_batch_map; std::vector* row_id_list; + + TaskBatch() : SdkTask(SdkTask::TASKBATCH) {} + virtual bool IsAsync() { return false; } + virtual uint32_t Size() { return 0; } + virtual int64_t TimeOut() { return 0; } + virtual void Wait() {} + virtual void SetError(ErrorCode::ErrorCodeType err, + const std::string& reason) {} + virtual const std::string& RowKey() { return server_addr; } }; std::string name_; @@ -432,10 +439,8 @@ class TableImpl : public Table { uint32_t commit_size_; uint64_t write_commit_timeout_; uint64_t read_commit_timeout_; - std::map mutation_batch_map_; - std::map reader_batch_map_; - uint64_t mutation_batch_seq_; - uint64_t reader_batch_seq_; + std::map mutation_batch_map_; + std::map reader_batch_map_; Counter cur_commit_pending_counter_; Counter cur_reader_pending_counter_; int64_t max_commit_pending_num_; diff --git a/src/sdk/tera.cc b/src/sdk/tera.cc index 0003f9a5f..d01bce0fe 100644 --- a/src/sdk/tera.cc +++ b/src/sdk/tera.cc @@ -41,6 +41,42 @@ static const char* strerr(ErrorCode::ErrorCodeType type) { case ErrorCode::kTxnFail: ret = "TransactionFail"; break; + case ErrorCode::kGTxnDataTooLarge: + ret = "GlobalTransactionDataTooLarge"; + break; + case ErrorCode::kGTxnNotSupport: + ret = "GlobalTransactionNotSupport"; + break; + case ErrorCode::kGTxnSchemaError: + ret = "GlobalTransactionSchemaError"; + break; + case ErrorCode::kGTxnOpAfterCommit: + ret = "GlobalTransactionOpAfterCommit"; + break; + case ErrorCode::kGTxnPrimaryLost: + ret = "GlobalTransactionPrimaryLost"; + break; + case ErrorCode::kGTxnWriteConflict: + ret = "GlobalTransactionWriteConflict"; + break; + case ErrorCode::kGTxnLockConflict: + ret = "GlobalTransactionLockConflict"; + break; + case ErrorCode::kGTxnOKButAckFailed: + ret = "GlobalTransactionOkButAckFailed"; + break; + case ErrorCode::kGTxnOKButNotifyFailed: + ret = "GlobalTransactionOKButNotifyFailed"; + break; + case ErrorCode::kGTxnPrewriteTimeout: + ret = "GlobalTransactionPrewriteTimeout"; + break; + case ErrorCode::kGTxnPrimaryCommitTimeout: + ret = "GlobalTransactionPrimaryCommitTimeout"; + break; + case ErrorCode::kGTxnTimestampLost: + ret = "GlobalTransactionTimestampLost"; + break; default: ret = "UnkownError"; } diff --git a/src/sdk/tera_easy.cc b/src/sdk/tera_easy.cc index c0758eb1d..6978ad9e5 100644 --- a/src/sdk/tera_easy.cc +++ b/src/sdk/tera_easy.cc @@ -13,8 +13,8 @@ #include "common/thread_pool.h" #include "tera.h" -#include "utils/atomic.h" -#include "utils/counter.h" +#include "common/atomic.h" +#include "common/counter.h" DEFINE_int32(tera_easy_ttl, 90 * 24 * 3600, "ttl(s) of key-value writed by tera_easy"); DEFINE_int32(tera_sdk_rpc_max_pending_num, 1024 * 1024, "max num of pending kv"); diff --git a/src/sdk/test/filter_utils_test.cc b/src/sdk/test/filter_utils_test.cc index 19051ce6c..456d406e7 100644 --- a/src/sdk/test/filter_utils_test.cc +++ b/src/sdk/test/filter_utils_test.cc @@ -40,27 +40,21 @@ TEST(FilterUtils, DefaultValueConverter) { EXPECT_FALSE(DefaultValueConverter("", "", NULL)); in = "8"; - out_p = string("\x80\x0\x0\x0\x0\x0\x0\x7", 8); + out_p = string("\x08\x0\x0\x0\x0\x0\x0\x0", 8); type = "int64"; + EXPECT_TRUE(DefaultValueConverter(in, type, &out)); EXPECT_EQ(out, out_p); in = "-8"; - out_p = string("\x7F\xFF\xFF\xFF\xFF\xFF\xFF\xF7", 8); + out_p = string("\xF8\xFF\xFF\xFF\xFF\xFF\xFF\xFF", 8); type = "int64"; EXPECT_TRUE(DefaultValueConverter(in, type, &out)); EXPECT_EQ(out, out_p); - in = "8"; - out_p = string("\x0\x0\x0\x0\x0\x0\x0\x8", 8); - type = "uint64"; - EXPECT_TRUE(DefaultValueConverter(in, type, &out)); - EXPECT_EQ(out, out_p); - in = "-8"; type = "string"; - EXPECT_TRUE(DefaultValueConverter(in, type, &out)); - EXPECT_TRUE(out == "-8"); + EXPECT_FALSE(DefaultValueConverter(in, type, &out)); type = "illegal"; EXPECT_FALSE(DefaultValueConverter(in, type, &out)); diff --git a/src/sdk/test/global_txn_batch_op.cc b/src/sdk/test/global_txn_batch_op.cc new file mode 100644 index 000000000..3e1d14af6 --- /dev/null +++ b/src/sdk/test/global_txn_batch_op.cc @@ -0,0 +1,440 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "tera.h" +#include "version.h" + +DECLARE_string(flagfile); + +using std::vector; +using std::string; +using std::cout; +using std::endl; +using std::pair; +using std::shared_ptr; +using std::unique_ptr; +using std::unordered_map; +using std::function; + +using TxnPtr = shared_ptr; +using RowMutationPtr = shared_ptr; +using ClientPtr = shared_ptr; +using TablePtr = shared_ptr; + +struct RowkeyCfQu{ + RowkeyCfQu()=default; + RowkeyCfQu(string rowkey, string cf, string qu): + rowkey_(rowkey), + cf_(cf), + qu_(qu) + {} + + string rowkey_, cf_, qu_; +}; +//Used for parsing operator string +using OperatorStructure = vector + vector>>; //vector of rowkey-cf-qus in a table + +static unordered_map& GetHelpCommand() { + static unordered_map help_commands; + return help_commands; +} + +static void InitHelpCommand() { + auto& help_commands = GetHelpCommand(); + help_commands["cas"] = "Compare and set old_vals to new_vals across different Tables, Rows, and Columns atomically, usage: \n" + " cas "; + help_commands["get"] = "Get values across different Tables, Rows, and Columns atomically, usage: \n" + " get "; + help_commands["put"] = "Put values across different Tables, Rows, and Columns atomically, usage: \n" + " put "; +} + +static void PrintHelp(const string& str = "") { + auto& help_commands = GetHelpCommand(); + if (str == "" || help_commands.find(str) == help_commands.end()) { + for (auto& help_info : help_commands) { + cout << help_info.first << " " << help_info.second << endl; + } + } else { + cout << str << ": " << help_commands[str] << endl; + } +} + +static vector split(const string& str, const char delimiter) { + vector res; + string::size_type pos = 0; + while (pos < str.size()) { + string::size_type new_pos = str.find(delimiter, pos); + if (new_pos == string::npos) { + res.emplace_back(str.begin() + pos, str.end()); + break; + } else { + res.emplace_back(str.begin() + pos, str.begin() + new_pos); + } + pos = new_pos + 1; + } + return res; +} + +static int64_t ParseOperatorStructure(const string& str, OperatorStructure& opst, size_t& num) { + opst.clear(); + num = 0; + vector table_operations = split(str, '#'); + for (auto& table_op : table_operations) { + vector table_rowkey = split(table_op, '-'); + if (table_rowkey.size() != 2) { + return -1; + } + + opst.emplace_back(table_rowkey[0], vector()); + vector row_operations = split(table_rowkey[1], ':'); + for (auto& row_op : row_operations) { + vector rowkey_cf_qu = split(row_op, '.'); + if (rowkey_cf_qu.size() < 2 || + rowkey_cf_qu.size() > 3) { + return -1; + } + + if (rowkey_cf_qu.size() == 3) { + opst.back().second.emplace_back(rowkey_cf_qu[0], rowkey_cf_qu[1], rowkey_cf_qu[2]); + } else { + opst.back().second.emplace_back(rowkey_cf_qu[0], rowkey_cf_qu[1], ""); + } + ++num; + } + } + return 0; +} + +static int64_t OpenTables(ClientPtr client, + const OperatorStructure& opst, + unordered_map& tables) { + tables.clear(); + tera::ErrorCode ec; + for (auto& table : opst) { + string tablename = table.first; + if (tables.find(table.first) == tables.end()) { + tables.emplace(table.first, TablePtr(client->OpenTable(table.first, &ec))); + if (!tables[table.first]) { + cout << "open table: " << table.first << " failed" << endl; + cout << ec.ToString() << endl; + return -1; + } + } + } + return 0; +} + +static int64_t PutOp(ClientPtr client, const vector& args) { + if (args.size() != 4) { + cout << "Arguments Error: " << args.size() << ", need 4" << endl; + PrintHelp(args[1]); + return -1; + } + + OperatorStructure opst; + size_t op_num = 0; + if (ParseOperatorStructure(args[2], opst, op_num) != 0) { + cout << "Parse Arguments Error" << endl; + PrintHelp(args[1]); + return -1; + } + + vector val = split(args[3], ':'); + if (op_num != val.size()) { + cout << "op size is not equal to val size" << endl; + return -1; + } + + unordered_map tables; + if (OpenTables(client, opst, tables) != 0) { + return -1; + } + + TxnPtr g_txn(client->NewGlobalTransaction()); + if (!g_txn) { + cout << "open txn failed" << endl; + return -1; + } + + string result; + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_ ; + const string& qu = row_cf_qu.qu_ ; + + unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); + reader->AddColumn(cf, qu); + g_txn->Get(reader.get()); + if (reader->GetError().GetType() != tera::ErrorCode::kOK && + reader->GetError().GetType() != tera::ErrorCode::kNotFound) { + std::cout << reader->GetError().ToString() << std::endl; + return -1; + } + + if (reader->Done()) { + result += ":"; + } else { + result += reader->Value() + ":"; + } + } + } + + if (!result.empty()) result.pop_back(); + + auto val_iter = val.begin(); + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + unordered_map row_mutations; + + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_ ; + const string& qu = row_cf_qu.qu_ ; + + if (row_mutations.find(rowkey) == row_mutations.end()) { + RowMutationPtr row_mutation(tables[tablename]->NewRowMutation(rowkey)); + row_mutations[rowkey] = row_mutation; + } + row_mutations[rowkey]->Put(cf, qu, *(val_iter++)); + } + + for (auto mutation : row_mutations) { + g_txn->ApplyMutation(mutation.second.get()); + } + } + + + g_txn->Commit(); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << "commit failed: " << g_txn->GetError().ToString() << std::endl; + cout << result << endl; + return -1; + } + std::cout << "commit success" << std::endl; + + return 0; +} + +static int64_t GetOp(ClientPtr client, const vector& args) { + if (args.size() != 3) { + cout << "Arguments Error: " << args.size() << ", need 3" << endl; + PrintHelp(args[1]); + return -1; + } + + OperatorStructure opst; + size_t op_num = 0; + if (ParseOperatorStructure(args[2], opst, op_num) != 0) { + cout << "Parse Arguments Error" << endl; + PrintHelp(args[1]); + return -1; + } + + unordered_map tables; + if (OpenTables(client, opst, tables) != 0) { + return -1; + } + + TxnPtr g_txn(client->NewGlobalTransaction()); + if (!g_txn) { + cout << "open txn failed" << endl; + return -1; + } + + string result; + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_ ; + const string& qu = row_cf_qu.qu_ ; + + unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); + reader->AddColumn(cf, qu); + g_txn->Get(reader.get()); + if (reader->GetError().GetType() != tera::ErrorCode::kOK && + reader->GetError().GetType() != tera::ErrorCode::kNotFound) { + std::cout << reader->GetError().ToString() << std::endl; + return -1; + } + + if (reader->Done()) { + result += ":"; + } else { + result += reader->Value() + ":"; + } + } + } + + if (!result.empty()) result.pop_back(); + cout << result << endl; + return 0; +} + +static int64_t CasOp(ClientPtr client, const vector& args) { + if (args.size() != 5) { + cout << "Arguments Error: " << args.size() << ", need 5" << endl; + PrintHelp(args[1]); + return -1; + } + + OperatorStructure opst; + size_t op_num = 0; + if (ParseOperatorStructure(args[2], opst, op_num) != 0) { + cout << "Parse Arguments Error" << endl; + PrintHelp(args[1]); + return -1; + } + + unordered_map tables; + if (OpenTables(client, opst, tables) != 0) { + return -1; + } + + TxnPtr g_txn(client->NewGlobalTransaction()); + if (!g_txn) { + cout << "open txn failed" << endl; + return -1; + } + + string cur_val; + const string& old_val = args[3]; + const string& new_val = args[4]; + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_ ; + const string& qu = row_cf_qu.qu_ ; + + unique_ptr reader(tables[tablename]->NewRowReader(rowkey)); + reader->AddColumn(cf, qu); + g_txn->Get(reader.get()); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << g_txn->GetError().ToString() << std::endl; + return -1; + } + + if (reader->Done()) { + cur_val += ":"; + } else { + cur_val += reader->Value() + ":"; + } + } + } + + if (!cur_val.empty()) cur_val.pop_back(); + + if (old_val != cur_val) { + cout << "cas failed: NotEqual" << endl; + return -1; + } + + vector new_val_list = split(new_val, ':'); + if (op_num != new_val_list.size()) { + cout << "op size is not equal to val size" << endl; + return -1; + } + + auto val_iter = new_val_list.begin(); + for (auto& table : opst) { + const string& tablename = table.first; + const auto& row_cf_qu_list = table.second; + unordered_map row_mutations; + + for (auto& row_cf_qu : row_cf_qu_list) { + const string& rowkey = row_cf_qu.rowkey_; + const string& cf = row_cf_qu.cf_ ; + const string& qu = row_cf_qu.qu_ ; + + if (row_mutations.find(rowkey) == row_mutations.end()) { + RowMutationPtr row_mutation(tables[tablename]->NewRowMutation(rowkey)); + row_mutations[rowkey] = row_mutation; + } + + row_mutations[rowkey]->Put(cf, qu, *(val_iter++)); + } + + for (auto mutation : row_mutations) { + g_txn->ApplyMutation(mutation.second.get()); + } + } + + g_txn->Commit(); + if (g_txn->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << "cas failed: " << g_txn->GetError().ToString() << std::endl; + return -1; + } else { + std::cout << "cas success" << endl; + } + + return 0; +} + +static void SignalHandler(int){ + _exit(0); +} + +int main(int argc, char *argv[]) { + signal(SIGINT, SignalHandler); + signal(SIGTERM, SignalHandler); + ::google::ParseCommandLineFlags(&argc, &argv, true); + + vector args(argv, argv + argc); + InitHelpCommand(); + + if (args.size() < 2) { + PrintHelp(); + return 0; + } else if (args[1] == "help") { + if (args.size() > 2) { + PrintHelp(args[2]); + return 0; + } else { + PrintHelp(); + return 0; + } + } else if (args[1] == "version") { + PrintSystemVersion(); + return 0; + } + + unordered_map& args)>> command_table; + command_table["put"] = PutOp; + command_table["get"] = GetOp; + command_table["cas"] = CasOp; + + if (command_table.find(args[1]) == command_table.end()) { + cout << "Wrong Command" << endl; + PrintHelp(); + return -1; + } + + tera::ErrorCode ec; + ClientPtr client(tera::Client::NewClient(FLAGS_flagfile, args[1], &ec)); + if (!client) { + cout << "Create Client Failed: " << ec.ToString() << endl; + return -1; + } + + return command_table[args[1]](client, args); +} diff --git a/src/sdk/test/global_txn_internal_test.cc b/src/sdk/test/global_txn_internal_test.cc new file mode 100644 index 000000000..e3310aa3a --- /dev/null +++ b/src/sdk/test/global_txn_internal_test.cc @@ -0,0 +1,789 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "sdk/global_txn_internal.h" +#include "sdk/read_impl.h" +#include "sdk/sdk_zk.h" +#include "sdk/sdk_utils.h" +#include "sdk/table_impl.h" +#include "sdk/test/mock_table.h" +#include "tera.h" + +DECLARE_string(tera_coord_type); +DECLARE_int32(tera_sdk_timeout); +DECLARE_int32(tera_gtxn_all_puts_size_limit); + +namespace tera { + +class GlobalTxnInternalTest : public ::testing::Test { +public: + GlobalTxnInternalTest() + : start_ts_(100), thread_pool_(2), gtxn_internal_(Client::NewClient()) { + gtxn_internal_.SetStartTimestamp(start_ts_); + } + + ~GlobalTxnInternalTest() {} + + Table* OpenTable(const std::string& tablename) { + FLAGS_tera_coord_type = "fake_zk"; + return static_cast(new MockTable(tablename, &thread_pool_)); + } + + void MakeKvPair(const std::string& row, + const std::string& cf, + const std::string& qu, + int64_t ts, + const std::string& val, + RowResult* value_list) { + + value_list->clear_key_values(); + KeyValuePair* kv = value_list->add_key_values(); + kv->set_key(row); + kv->set_column_family(cf); + kv->set_qualifier(qu); + kv->set_timestamp(ts); + kv->set_value(val); + } + + void SetSchema(Table* table, const TableSchema& table_schema) { + TableImpl* table_impl = static_cast(table); + table_impl->table_schema_ = table_schema; + } + + void BuildResult(RowReaderImpl* reader_impl, + const RowResult& value_list, + RowReader::TRow *row) { + + reader_impl->result_.clear_key_values(); + reader_impl->SetResult(value_list); + row->clear(); + reader_impl->ToMap(row); + } + +private: + int64_t start_ts_; + common::ThreadPool thread_pool_; + GlobalTxnInternal gtxn_internal_; +}; + +TEST_F(GlobalTxnInternalTest, CheckTable) { + ErrorCode status; + Table* t1 = OpenTable("t1"); + Table* t2 = OpenTable("t2"); + Table* t3 = OpenTable("t3"); + Table* t4 = OpenTable("t4"); + EXPECT_FALSE(t1 == NULL); + EXPECT_FALSE(t2 == NULL); + EXPECT_FALSE(t3 == NULL); + EXPECT_FALSE(t4 == NULL); + + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + // table and not exist cf + TableDescriptor desc1("t1"); + desc1.EnableTxn(); + desc1.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd11 = desc1.AddColumnFamily("cf1"); + cfd11->DisableGlobalTransaction(); + + TableSchema schema1; + TableDescToSchema(desc1, &schema1); + SetSchema(t2, schema1); + EXPECT_FALSE(gtxn_internal_.CheckTable(t2, &status)); + + // table and exist cf + TableDescriptor desc2("t1"); + desc2.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd12 = desc2.AddColumnFamily("cf1"); + cfd12->EnableGlobalTransaction(); + + TableSchema schema2; + TableDescToSchema(desc2, &schema2); + SetSchema(t3, schema2); + EXPECT_FALSE(gtxn_internal_.CheckTable(t3, &status)); + + // table and not exist cf + TableDescriptor desc3("t1"); + desc3.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd13 = desc3.AddColumnFamily("cf1"); + cfd13->DisableGlobalTransaction(); + + TableSchema schema3; + TableDescToSchema(desc3, &schema3); + SetSchema(t4, schema3); + EXPECT_FALSE(gtxn_internal_.CheckTable(t4, &status)); + + delete t1; + delete t2; + delete t3; + delete t4; +} + +TEST_F(GlobalTxnInternalTest, IsLockedByOthers) { + Table* t1_ptr = OpenTable("t1"); + + Cell cell1(t1_ptr, "row1", "cf1", "qu1", start_ts_, "val"); + + RowReader* reader = t1_ptr->NewRowReader("row1"); + RowReaderImpl* reader_impl = (RowReaderImpl*)reader; + RowResult value_list; + // exist lock col && ts < start_ts_ + // 12 < 100 less than start_ts + MakeKvPair("row1", "cf1", PackLockName("qu1"), 12, "", &value_list); + RowReader::TRow row; + BuildResult(reader_impl, value_list, &row); + EXPECT_TRUE(gtxn_internal_.IsLockedByOthers(row, cell1)); + + // not exist lock col + value_list.clear_key_values(); + MakeKvPair("row1", "cf1", "qu1", 120, "", &value_list); + BuildResult(reader_impl, value_list, &row); + EXPECT_FALSE(gtxn_internal_.IsLockedByOthers(row, cell1)); + + // exist lock col && ts > start_ts_ + value_list.clear_key_values(); + // 120 > 100 + MakeKvPair("row1", "cf1", PackLockName("qu1"), 120, "", &value_list); + BuildResult(reader_impl, value_list, &row); + + EXPECT_FALSE(gtxn_internal_.IsLockedByOthers(row, cell1)); + delete t1_ptr; +} + +TEST_F(GlobalTxnInternalTest, IsPrimary) { + const std::string t1 = "t1", t2 = "t2", cf2 = "cf2"; + Table* t1_ptr = OpenTable(t1); + EXPECT_FALSE(t1_ptr == NULL); + Cell cell1(t1_ptr, "row1", "cf1", "qu1", start_ts_, "val"); + Cell cell2(t1_ptr, "row1", "cf2", "qu1", start_ts_, "val"); + + PrimaryInfo info2; + info2.set_table_name("t1"); + info2.set_row_key("row1"); + info2.set_column_family("cf1"); + info2.set_qualifier("qu1"); + info2.set_gtxn_start_ts(200); + + EXPECT_TRUE(gtxn_internal_.IsPrimary(cell1, info2)); + EXPECT_FALSE(gtxn_internal_.IsPrimary(cell2, info2)); + + delete t1_ptr; +} + +TEST_F(GlobalTxnInternalTest, FindTable) { + const std::string t1 = "t1", t2 = "t2", cf2 = "cf2"; + + Table* t1_ptr = OpenTable(t1); + EXPECT_FALSE(t1_ptr == NULL); + + TableDescriptor desc(t1); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd = desc.AddColumnFamily(cf2); + cfd->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1_ptr, schema); + + // call CheckTable(t1) + ErrorCode status; + EXPECT_TRUE(gtxn_internal_.CheckTable(t1_ptr, &status)); + + // t1 in tables_ + Table* t1_ptr1 = gtxn_internal_.FindTable(t1); + EXPECT_TRUE(t1_ptr1->GetName() == t1_ptr->GetName()); + + delete t1_ptr; +} + +TEST_F(GlobalTxnInternalTest, ConflictWithOtherWrite) { + Table* t1_ptr = OpenTable("t1"); + RowReader* reader = t1_ptr->NewRowReader("row1"); + RowReaderImpl* reader_impl = (RowReaderImpl*)reader; + RowResult value_list; + // 12 < 100 less than start_ts + MakeKvPair("row1", "cf1", "qu1", 12, "", &value_list); + reader_impl->SetResult(value_list); + ErrorCode status; + std::vector ws; + // ws is empty + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // different row writes + for(int i = 0; i < 3; ++i) { + Cell cell(t1_ptr, "row2", "cf" + std::to_string(i), + "qu" + std::to_string(i), start_ts_, "val"); + Write w(cell); + ws.push_back(w); + } + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // same row, but not exist target cf + ws.clear(); + for(int i = 0; i < 3; ++i) { + Cell cell(t1_ptr, "row1", "cf0", "qu" + std::to_string(i), start_ts_, "val"); + Write w(cell); + ws.push_back(w); + } + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // same row,cf, but not exist write_col, lock_col + ws.clear(); + for(int i = 0; i < 3; ++i) { + Cell cell(t1_ptr, "row1", "cf1", "qu" + std::to_string(i), start_ts_, "val"); + Write w(cell); + ws.push_back(w); + } + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // same row, cf && exist write_col(latest_ts >= start_ts_) + value_list.clear_key_values(); + // 120 > 100 + MakeKvPair("row1", "cf1", PackWriteName("qu1"), 120, "", &value_list); + reader_impl->result_.clear_key_values(); + reader_impl->SetResult(value_list); + + EXPECT_TRUE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnWriteConflict); + + // same row, cf && exist write_col(latest_ts < start_ts_) + // not exist lock_col + value_list.clear_key_values(); + // 20 < 100 less than start_ts + MakeKvPair("row1", "cf1", PackWriteName("qu1"), 20, "", &value_list); + reader_impl->result_.clear_key_values(); + reader_impl->SetResult(value_list); + + EXPECT_FALSE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + + // same row, cf && exist write_col(latest_ts < start_ts_) + // not exist lock_col + value_list.clear_key_values(); + // 20 < 100 less than start_ts + MakeKvPair("row1", "cf1", PackWriteName("qu1"), 20, "", &value_list); + MakeKvPair("row1", "cf1", PackLockName("qu1"), 20, "", &value_list); + reader_impl->result_.clear_key_values(); + reader_impl->SetResult(value_list); + + EXPECT_TRUE(gtxn_internal_.ConflictWithOtherWrite(&ws, reader, &status)); + EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnLockConflict); + + delete t1_ptr; +} + +TEST_F(GlobalTxnInternalTest, IsGTxnColumnFamily) { + const std::string t1 = "t1", t2 = "t2", cf1 = "cf1", cf2 = "cf2"; + + Table* t1_ptr = OpenTable(t1); + EXPECT_FALSE(t1_ptr == NULL); + + TableDescriptor desc(t1); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd = desc.AddColumnFamily(cf1); + cfd->DisableGlobalTransaction(); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily(cf2); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1_ptr, schema); + + // IsGTxnColumnFamily(t1, xxx) must be call after CheckTable(t1) + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily(t1, cf1)); + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily(t1, cf2)); + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily(t2, cf1)); + // call CheckTable(t1) + ErrorCode status; + EXPECT_TRUE(gtxn_internal_.CheckTable(t1_ptr, &status)); + + // call IsGTxnColumnFamily(t1, xxx) cf1 is gtxn=false + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily(t1, cf1)); + + // call IsGTxnColumnFamily(t1, xxx) cf2 is gtxn=true + EXPECT_TRUE(gtxn_internal_.IsGTxnColumnFamily(t1, cf2)); + + // call IsGTxnColumnFamily(t2, xxx) + EXPECT_FALSE(gtxn_internal_.IsGTxnColumnFamily(t2, cf1)); + delete t1_ptr; +} + +TEST_F(GlobalTxnInternalTest, SetInternalSdkTaskTimeout) { + Table* t1_ptr = OpenTable("t1"); + RowReader* reader = t1_ptr->NewRowReader("row1"); + RowReaderImpl* reader_impl = (RowReaderImpl*)reader; + + EXPECT_TRUE(gtxn_internal_.terminal_time_ == 0); + gtxn_internal_.SetCommitDuration(1000); + EXPECT_TRUE(gtxn_internal_.terminal_time_ > 1000); + + gtxn_internal_.SetInternalSdkTaskTimeout(reader); + EXPECT_TRUE(reader_impl->TimeOut() == 1000); + + sleep(2); + gtxn_internal_.SetInternalSdkTaskTimeout(reader); + EXPECT_TRUE(reader_impl->TimeOut() == 1); + EXPECT_TRUE(gtxn_internal_.IsTimeOut() == true); + + gtxn_internal_.is_timeout_ = false; + EXPECT_FALSE(gtxn_internal_.terminal_time_ == 0); + gtxn_internal_.SetCommitDuration(1000000); + EXPECT_TRUE(gtxn_internal_.terminal_time_ > 1000000); + + gtxn_internal_.SetInternalSdkTaskTimeout(reader); + EXPECT_TRUE(reader_impl->TimeOut() == FLAGS_tera_sdk_timeout); + EXPECT_TRUE(gtxn_internal_.IsTimeOut() == false); +} + +TEST_F(GlobalTxnInternalTest, VerifyWritesSize0) { + Table* t1_ptr = OpenTable("t1"); + RowMutation* mu = t1_ptr->NewRowMutation("r1"); + int64_t writes_size = 0; + bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); + EXPECT_TRUE(writes_size == 0); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); + delete t1_ptr; + delete mu; +} + +TEST_F(GlobalTxnInternalTest, VerifyWritesSize1) { + Table* t1_ptr = OpenTable("t1"); + RowMutation* mu = t1_ptr->NewRowMutation("r1"); + mu->Put("cf0", "qu1", "value", (int64_t)(5)); + mu->Put("cf0", "qu2", "value", (int64_t)(5)); + mu->Put("cf0", "qu3", "value", (int64_t)(5)); + mu->Put("cf0", "qu4", "value", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu5", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu6", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu7", (int64_t)(5)); + + int64_t writes_size = 0; + FLAGS_tera_gtxn_all_puts_size_limit = 10; + bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); + RowMutationImpl* row_mu_impl = static_cast(mu); + EXPECT_TRUE(row_mu_impl->Size() == writes_size); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnDataTooLarge); + delete t1_ptr; + delete mu; +} + +TEST_F(GlobalTxnInternalTest, VerifyWritesSize2) { + Table* t1_ptr = OpenTable("t1"); + RowMutation* mu = t1_ptr->NewRowMutation("r1"); + mu->Put("cf0", "qu1", "value", (int64_t)(5)); + + int64_t writes_size = 0; + FLAGS_tera_gtxn_all_puts_size_limit = 100000; + bool ret = gtxn_internal_.VerifyWritesSize(mu, &writes_size); + RowMutationImpl* row_mu_impl = static_cast(mu); + EXPECT_TRUE(row_mu_impl->Size() == writes_size); + EXPECT_TRUE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kOK); + delete t1_ptr; + delete mu; +} + +TEST_F(GlobalTxnInternalTest, BadQualifier) { + bool ret = BadQualifier(""); + EXPECT_FALSE(ret); + ret = BadQualifier("aaaaaaaaaaaaaaa"); + EXPECT_FALSE(ret); + ret = BadQualifier("_*_"); + EXPECT_TRUE(ret); + ret = BadQualifier("____*_"); + EXPECT_TRUE(ret); + ret = BadQualifier("______"); + EXPECT_TRUE(ret); + ret = BadQualifier("____NN_"); + EXPECT_FALSE(ret); + ret = BadQualifier("NN_"); + EXPECT_FALSE(ret); +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation0) { + Table* t1_ptr = OpenTable("t1"); + RowMutation* mu = t1_ptr->NewRowMutation("r1"); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); + delete t1_ptr; + delete mu; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation1) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf1", "qu1", "value", (int64_t)(5)); + mu->Put("cf1", "qu1_N_", "value", (int64_t)(5)); + mu->Put("cf1", "qu2", "value", (int64_t)(5)); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); + delete t1; + delete mu; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation2) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf0", "qu1", "value", (int64_t)(5)); + mu->Put("cf1", "qu1_N_", "value", (int64_t)(5)); + mu->Put("cf1", "qu2", "value", (int64_t)(5)); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kBadParam); + delete t1; + delete mu; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation3) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf1", "qu1", "value", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu1", (int64_t)(5)); + mu->DeleteColumn("cf1", "qu2", (int64_t)(5)); + mu->DeleteFamily("cf1", (int64_t)(5)); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_FALSE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnNotSupport); + delete t1; + delete mu; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowMutation4) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowMutation* mu = t1->NewRowMutation("r1"); + mu->Put("cf1", "qu1", "value", (int64_t)(5)); + mu->DeleteColumns("cf1", "qu1", (int64_t)(5)); + mu->DeleteColumn("cf1", "qu2", (int64_t)(5)); + bool ret = gtxn_internal_.VerifyUserRowMutation(mu); + EXPECT_TRUE(ret); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kOK); + delete t1; + delete mu; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowReader0) { + Table* t1_ptr = OpenTable("t1"); + RowReader* r = t1_ptr->NewRowReader("r1"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete t1_ptr; + delete r; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowReader1) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + //cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_FALSE(gtxn_internal_.CheckTable(t1, &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf1", "qu"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == status.GetType()); + delete t1; + delete r; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowReader2) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf1", "qu"); + r->SetSnapshot(10); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete t1; + delete r; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowReader3) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumnFamily("cf1"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete t1; + delete r; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowReader4) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf0", "qu"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete t1; + delete r; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowReader5) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf1", "qu_*_"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_FALSE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + delete t1; + delete r; +} + +TEST_F(GlobalTxnInternalTest, VerifyUserRowReader6) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + RowReader* r = t1->NewRowReader("r1"); + r->AddColumn("cf1", "qu"); + r->AddColumn("cf1", "q1"); + r->AddColumn("cf1", "q2"); + bool ret = gtxn_internal_.VerifyUserRowReader(r); + EXPECT_TRUE(ret); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kOK); + delete t1; + delete r; +} + +TEST_F(GlobalTxnInternalTest, PrimaryIsLocked1) { + // bad case b. read primary lock failed + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + tera::PrimaryInfo info2; + std::string info2_str; + info2.set_table_name("t1"); + info2.set_row_key("row1"); + info2.set_column_family("cf1"); + info2.set_qualifier("qu1"); + info2.set_gtxn_start_ts(100); + info2.SerializeToString(&info2_str); + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + ErrorCode mock_status; + mock_status.SetFailed(ErrorCode::kSystem,""); + std::vector reader_errs; + reader_errs.push_back(mock_status); + (static_cast(t1))->AddReaderErrors(reader_errs); + + EXPECT_FALSE(gtxn_internal_.PrimaryIsLocked(info2, 12, &status)); + EXPECT_TRUE(status.GetType() == ErrorCode::kSystem); + delete t1; +} + +TEST_F(GlobalTxnInternalTest, PrimaryIsLocked2) { + // bad case a. read primary lock notfound + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + tera::PrimaryInfo info2; + std::string info2_str; + info2.set_table_name("t1"); + info2.set_row_key("row1"); + info2.set_column_family("cf1"); + info2.set_qualifier("qu1"); + info2.set_gtxn_start_ts(100); + info2.SerializeToString(&info2_str); + EXPECT_TRUE(gtxn_internal_.CheckTable(t1, &status)); + + ErrorCode mock_status; + mock_status.SetFailed(ErrorCode::kNotFound,""); + std::vector reader_errs; + reader_errs.push_back(mock_status); + (static_cast(t1))->AddReaderErrors(reader_errs); + + EXPECT_FALSE(gtxn_internal_.PrimaryIsLocked(info2, 12, &status)); + delete t1; +} + +} // namespace tera diff --git a/src/sdk/test/global_txn_test.cc b/src/sdk/test/global_txn_test.cc new file mode 100644 index 000000000..c68e0cd2e --- /dev/null +++ b/src/sdk/test/global_txn_test.cc @@ -0,0 +1,1265 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "sdk/global_txn.h" +#include "sdk/global_txn_internal.h" +#include "sdk/read_impl.h" +#include "sdk/table_impl.h" +#include "sdk/sdk_zk.h" +#include "sdk/test/mock_table.h" +#include "tera.h" + +DECLARE_string(tera_coord_type); + +namespace tera { + +class GlobalTxnTest : public ::testing::Test { +public: + GlobalTxnTest() : + thread_pool_(2), + gtxn_(Client::NewClient(), &thread_pool_, (new sdk::MockTimeoracleClusterFinder(""))) { + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.status_returned_ = false; + } + + ~GlobalTxnTest() {} + + void SetSchema(Table* table, const TableSchema& table_schema) { + TableImpl* table_impl = static_cast(table); + table_impl->table_schema_ = table_schema; + } + + Table* OpenTable(const std::string& tablename) { + FLAGS_tera_coord_type = "fake_zk"; + return static_cast(new MockTable(tablename, &thread_pool_)); + } + +private: + common::ThreadPool thread_pool_; + GlobalTxn gtxn_; +}; + +TEST_F(GlobalTxnTest, Commit) { + + // sync commit ut + gtxn_.user_commit_callback_ = NULL; + // mutation haven't apply + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + gtxn_.put_fail_cnt_.Set(10); + gtxn_.has_commited_ = false; + EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.has_commited_ == false); + + // have commited + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + gtxn_.put_fail_cnt_.Set(0); + gtxn_.has_commited_ = true; + EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.has_commited_ == true); + + // run commit in the legal state + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + gtxn_.writes_.clear(); + gtxn_.put_fail_cnt_.Set(0); + gtxn_.has_commited_ = false; + EXPECT_TRUE(gtxn_.Commit().GetType() == ErrorCode::kOK); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.has_commited_ == true); +} + +TEST_F(GlobalTxnTest, DoVerifyPrimaryLockedCallback) { + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); + reader_impl->txn_ = txn; + + // not found primary + reader_impl->error_code_.SetFailed(ErrorCode::kNotFound, ""); + + RowReader* reader = static_cast(reader_impl); + gtxn_.DoVerifyPrimaryLockedCallback(reader); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryLost); +} + +TEST_F(GlobalTxnTest, DoVerifyPrimaryLockedCallback1) { + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); + reader_impl->txn_ = txn; + + // reader timeout + reader_impl->error_code_.SetFailed(ErrorCode::kTimeout, ""); + RowReader* reader = static_cast(reader_impl); + gtxn_.DoVerifyPrimaryLockedCallback(reader); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryCommitTimeout); +} + +TEST_F(GlobalTxnTest, DoVerifyPrimaryLockedCallback2) { + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); + reader_impl->txn_ = txn; + // reader other error + reader_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + RowReader* reader = static_cast(reader_impl); + gtxn_.DoVerifyPrimaryLockedCallback(reader); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); +} + +TEST_F(GlobalTxnTest, CheckPrimaryStatusAndCommmitSecondaries) { + SingleRowTxn* txn = new SingleRowTxn(NULL, "rowkey", NULL); + + // primary commit timeout + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + txn->mutation_buffer_.SetError(ErrorCode::kTimeout,""); + gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryCommitTimeout); + + // primary commit other error + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + txn = new SingleRowTxn(NULL, "rowkey", NULL); + txn->mutation_buffer_.SetError(ErrorCode::kSystem, ""); + gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); + + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); + + // primary done run next step + gtxn_.finish_ = false; + gtxn_.status_returned_ = false; + txn = new SingleRowTxn(NULL, "rowkey", NULL); + txn->mutation_buffer_.SetError(ErrorCode::kOK, ""); + gtxn_.writes_.clear(); + const std::string tablename = "test_t"; + Table* t = OpenTable(tablename); + Cell cell(t, "r1", "cf", "qu", 1, "val"); + Write w(cell); + // insert a 'Write' + gtxn_.SaveWrite(tablename, "r1", w); + + gtxn_.acks_.clear(); + gtxn_.notifies_.clear(); + gtxn_.CheckPrimaryStatusAndCommmitSecondaries(txn); + + EXPECT_TRUE(gtxn_.finish_ == true); + EXPECT_TRUE(gtxn_.status_returned_ == true); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); +} + +TEST_F(GlobalTxnTest, SaveWrite) { + const std::string tablename = "test_t", tablename4 = "test_t4"; + Table* t = OpenTable(tablename); + const std::string row_key = "r1", row_key4 = "r2"; + Cell cell(t, row_key, "cf", "qu", 1, "val"); + Write w(cell); + gtxn_.writes_.clear(); + // insert a 'Write' + gtxn_.SaveWrite(tablename, row_key, w); + GlobalTxn::TableWithRowkey twr(tablename, row_key); + auto w1 = gtxn_.writes_.find(twr); + EXPECT_TRUE(w1 != gtxn_.writes_.end()); + + // insert a same 'Write' + gtxn_.SaveWrite(tablename, row_key, w); + EXPECT_TRUE(gtxn_.writes_.size() == 1); + + // insert a delete type 'Write' at same Cell + Cell cell2(t, row_key, "cf", "qu", 1); + Write w2(cell2); + gtxn_.SaveWrite(tablename, row_key, w2); + EXPECT_TRUE(gtxn_.writes_.size() == 1); + + delete t; +} + +TEST_F(GlobalTxnTest, DoAckCallback) { + const std::string tablename = "test_t1", tablename5 = "test_t5"; + Table* t1 = OpenTable(tablename); + Table* t5 = OpenTable(tablename5); + + // test acks cnt = 2 && not notify + RowMutation* mu1 = t1->NewRowMutation("r1"); + RowMutation* mu5 = t5->NewRowMutation("r1"); + gtxn_.finish_ = false; + gtxn_.ack_done_cnt_.Set(0); + gtxn_.acks_cnt_.Set(2); + gtxn_.notifies_cnt_.Set(0); + gtxn_.DoAckCallback(mu1); + EXPECT_TRUE(gtxn_.finish_ == false); + gtxn_.DoAckCallback(mu5); + EXPECT_TRUE(gtxn_.finish_ == true); + + // test acks cnt = 2 && notify cnt > 0 + RowMutation* mu11 = t1->NewRowMutation("r1"); + RowMutation* mu55 = t5->NewRowMutation("r1"); + gtxn_.finish_ = false; + gtxn_.ack_done_cnt_.Set(0); + gtxn_.acks_cnt_.Set(2); + gtxn_.notifies_cnt_.Set(1); + + gtxn_.DoAckCallback(mu11); + EXPECT_TRUE(gtxn_.finish_ == false); + gtxn_.DoAckCallback(mu55); + EXPECT_TRUE(gtxn_.finish_ == false); + + delete t1; + delete t5; +} + +TEST_F(GlobalTxnTest, DoNotifyCallback) { + const std::string tablename = "test_t11", tablename5 = "test_t55"; + Table* t11 = OpenTable(tablename); + Table* t55 = OpenTable(tablename5); + + // test notifies cnt = 2 + RowMutation* mu1 = t11->NewRowMutation("r1"); + RowMutation* mu5 = t55->NewRowMutation("r1"); + gtxn_.finish_ = false; + gtxn_.notify_done_cnt_.Set(0); + gtxn_.notifies_cnt_.Set(2); + gtxn_.all_task_pushed_ = true; + gtxn_.DoNotifyCallback(mu1); + EXPECT_TRUE(gtxn_.finish_ == false); + gtxn_.DoNotifyCallback(mu5); + EXPECT_TRUE(gtxn_.finish_ == true); + delete t11; + delete t55; +} + +void NotifyWarpper(GlobalTxn* gtxn, + Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) { + gtxn->Notify(t, row_key, column_family, qualifier); +} + +TEST_F(GlobalTxnTest, Notify) { + size_t notify_thread_cnt = 30; + std::vector threads; + // all Table* is NULL + gtxn_.notifies_.clear(); + gtxn_.notifies_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); + threads.reserve(notify_thread_cnt); + Table* t0 = NULL; + for (int i = 0; i < notify_thread_cnt; ++i) { + threads.emplace_back(std::thread(NotifyWarpper, >xn_, t0, "", "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(0 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); + + // same table and same row + gtxn_.notifies_.clear(); + gtxn_.notifies_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); + Table* t1 = OpenTable("t1"); + threads.reserve(30); + for (int i = 0; i < notify_thread_cnt; ++i) { + threads.emplace_back(std::thread(NotifyWarpper, >xn_, t1, "r1", "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(1 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 1); + GlobalTxn::TableWithRowkey twr("t1", "r1"); + EXPECT_TRUE(gtxn_.notifies_[twr].size() == notify_thread_cnt); + + // same table and diff row + gtxn_.notifies_.clear(); + gtxn_.notifies_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == 0); + for (int i = 0; i < notify_thread_cnt; ++i) { + threads.emplace_back(std::thread(NotifyWarpper, >xn_, t1, "r" + std::to_string(i), "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(notify_thread_cnt == gtxn_.notifies_.size()); + EXPECT_TRUE(gtxn_.notifies_cnt_.Get() == notify_thread_cnt); + + for (int i = 0; i < notify_thread_cnt; ++i) { + GlobalTxn::TableWithRowkey twr1("t1", "r" + std::to_string(i)); + EXPECT_TRUE(gtxn_.notifies_[twr1].size() == 1); + } +} + +void AckWarpper(GlobalTxn* gtxn, Table* t, + const std::string& row_key, + const std::string& column_family, + const std::string& qualifier) { + gtxn->Ack(t, row_key, column_family, qualifier); +} + +TEST_F(GlobalTxnTest, Ack) { + size_t ack_thread_cnt = 30; + std::vector threads; + // all Table* is NULL + gtxn_.acks_.clear(); + gtxn_.acks_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); + threads.reserve(ack_thread_cnt); + Table* t0 = NULL; + for (int i = 0; i < ack_thread_cnt; ++i) { + threads.emplace_back(std::thread(AckWarpper, >xn_, t0, "", "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(0 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); + + // same table and same row + gtxn_.acks_.clear(); + gtxn_.acks_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); + Table* t1 = OpenTable("t1"); + threads.reserve(30); + for (int i = 0; i < ack_thread_cnt; ++i) { + threads.emplace_back(std::thread(AckWarpper, >xn_, t1, "r1", "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(1 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 1); + GlobalTxn::TableWithRowkey twr("t1", "r1"); + EXPECT_TRUE(gtxn_.acks_[twr].size() == ack_thread_cnt); + + // same table and diff row + gtxn_.acks_.clear(); + gtxn_.acks_cnt_.Set(0); + EXPECT_TRUE(0 == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == 0); + for (int i = 0; i < ack_thread_cnt; ++i) { + threads.emplace_back(std::thread(AckWarpper, >xn_, t1, "r" + std::to_string(i), "", "")); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(ack_thread_cnt == gtxn_.acks_.size()); + EXPECT_TRUE(gtxn_.acks_cnt_.Get() == ack_thread_cnt); + + for (int i = 0; i < ack_thread_cnt; ++i) { + GlobalTxn::TableWithRowkey twr1("t1", "r" + std::to_string(i)); + EXPECT_TRUE(gtxn_.acks_[twr1].size() == 1); + } +} + +TEST_F(GlobalTxnTest, DoCommitSecondariesCallback0) { + // mutation error is kOK will finish + std::vector threads; + size_t secondaries_thread_cnt = 10; + gtxn_.all_task_pushed_ = true; + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.acks_cnt_.Set(0); + gtxn_.ack_done_cnt_.Set(0); + gtxn_.notifies_cnt_.Set(0); + gtxn_.notify_done_cnt_.Set(0); + gtxn_.writes_cnt_.Set(secondaries_thread_cnt); + for (int i = 0; i < secondaries_thread_cnt; ++i) { + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); + RowMutation* mu = static_cast(mu_impl); + auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); + threads.emplace_back(std::thread(func)); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(gtxn_.finish_ == true); +} + +TEST_F(GlobalTxnTest, DoCommitSecondariesCallback1) { + // mutation error is kOK not last one + size_t secondaries_thread_cnt = 50; + std::vector threads; + threads.reserve(secondaries_thread_cnt); + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.acks_cnt_.Set(0); + gtxn_.ack_done_cnt_.Set(0); + gtxn_.notifies_cnt_.Set(0); + gtxn_.notify_done_cnt_.Set(0); + gtxn_.writes_cnt_.Set(secondaries_thread_cnt + 1); + for (int i = 0; i < secondaries_thread_cnt; ++i) { + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); + RowMutation* mu = static_cast(mu_impl); + auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); + threads.emplace_back(std::thread(func)); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(gtxn_.finish_ == false); +} + +TEST_F(GlobalTxnTest, DoCommitSecondariesCallback2) { + // mutation error is not kOK but status_ is not changed + size_t secondaries_thread_cnt = 10; + std::vector threads; + threads.reserve(secondaries_thread_cnt); + gtxn_.all_task_pushed_ = true; + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.acks_cnt_.Set(0); + gtxn_.ack_done_cnt_.Set(0); + gtxn_.notifies_cnt_.Set(0); + gtxn_.notify_done_cnt_.Set(0); + gtxn_.writes_cnt_.Set(secondaries_thread_cnt); + for (int i = 0; i < secondaries_thread_cnt; ++i) { + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + mu_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + RowMutation* mu = static_cast(mu_impl); + auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); + threads.emplace_back(std::thread(func)); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + EXPECT_TRUE(gtxn_.finish_ == true); +} + +TEST_F(GlobalTxnTest, DoVerifyPrimaryLockedCallback3) { + // mutation error is not kOK but status_ is not changed + size_t secondaries_thread_cnt = 30; + std::vector threads; + + threads.reserve(secondaries_thread_cnt); + gtxn_.status_.SetFailed(ErrorCode::kOK); + gtxn_.acks_cnt_.Set(10); + gtxn_.ack_done_cnt_.Set(9); + gtxn_.notifies_cnt_.Set(10); + gtxn_.notify_done_cnt_.Set(10); + gtxn_.writes_cnt_.Set(secondaries_thread_cnt); + for (int i = 0; i < secondaries_thread_cnt; ++i) { + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + mu_impl->error_code_.SetFailed(ErrorCode::kOK, ""); + RowMutation* mu = static_cast(mu_impl); + auto func = std::bind(&GlobalTxn::DoCommitSecondariesCallback, >xn_, mu); + threads.emplace_back(std::thread(func)); + } + for (int i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); + EXPECT_TRUE(gtxn_.finish_ == false); + +} + +std::atomic g_callback_run_cnt(0); + +static void EmptyMutationCallback(RowMutation* mu) { + LOG(INFO) << "run empty mutation callback"; + ++g_callback_run_cnt; +} + +// has_commited == true && status_returned_ == false && set mutation callback +TEST_F(GlobalTxnTest, ApplyMutation0) { + g_callback_run_cnt = 0; + gtxn_.has_commited_ = true; + gtxn_.status_returned_ = false; + + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + RowMutation* mu = static_cast(mu_impl); + mu->SetCallBack(EmptyMutationCallback); + gtxn_.ApplyMutation(mu); + thread_pool_.Stop(true); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.status_returned_ == true); + EXPECT_TRUE(gtxn_.put_fail_cnt_.Get() == 0); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(g_callback_run_cnt == 1); +} + +// has_commited == true && status_returned_ == false && don't set mutation callback +TEST_F(GlobalTxnTest, ApplyMutation1) { + g_callback_run_cnt = 0; + gtxn_.has_commited_ = true; + gtxn_.status_returned_ = false; + + RowMutationImpl* mu_impl = new RowMutationImpl(NULL, "rowkey"); + RowMutation* mu = static_cast(mu_impl); + gtxn_.ApplyMutation(mu); + thread_pool_.Stop(true); + EXPECT_TRUE(mu->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(gtxn_.status_returned_ == true); + EXPECT_TRUE(gtxn_.put_fail_cnt_.Get() == 0); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(g_callback_run_cnt == 0); +} + +TEST_F(GlobalTxnTest, SetReaderStatusAndRunCallback0) { + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + ErrorCode status; + status.SetFailed(ErrorCode::kSystem, ""); + gtxn_.SetReaderStatusAndRunCallback(reader_impl,&status); + RowReader* r = static_cast(reader_impl); + thread_pool_.Stop(true); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kSystem); + EXPECT_TRUE(r->IsFinished()); + delete r; +} + +TEST_F(GlobalTxnTest, SetReaderStatusAndRunCallback1) { + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + reader_impl->SetCallBack([](RowReader* r) { + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kSystem); + delete r; + }); + ErrorCode status; + status.SetFailed(ErrorCode::kSystem, ""); + gtxn_.SetReaderStatusAndRunCallback(reader_impl,&status); + thread_pool_.Stop(true); +} + +TEST_F(GlobalTxnTest, Get0) { + gtxn_.has_commited_ = true; + RowReaderImpl* reader_impl = new RowReaderImpl(NULL, "rowkey"); + RowReader* r = static_cast(reader_impl); + EXPECT_TRUE(gtxn_.Get(r).GetType() == ErrorCode::kGTxnOpAfterCommit); + thread_pool_.Stop(true); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kGTxnOpAfterCommit); + EXPECT_TRUE(r->IsFinished()); + delete r; +} + +TEST_F(GlobalTxnTest, Get1) { + // set a table to tables_ + ErrorCode status; + Table* t1 = OpenTable("t1"); + // table and exist cf + TableDescriptor desc("t1"); + desc.EnableTxn(); + desc.AddLocalityGroup("lg0"); + ColumnFamilyDescriptor* cfd1 = desc.AddColumnFamily("cf1"); + cfd1->EnableGlobalTransaction(); + + TableSchema schema; + TableDescToSchema(desc, &schema); + SetSchema(t1, schema); + + EXPECT_TRUE(gtxn_.gtxn_internal_->CheckTable(t1, &status)); + + RowReader* r = t1->NewRowReader("r1"); + bool ret = gtxn_.gtxn_internal_->VerifyUserRowReader(r); + EXPECT_FALSE(ret); + + gtxn_.has_commited_ = false; + EXPECT_TRUE(gtxn_.Get(r).GetType() == ErrorCode::kBadParam); + thread_pool_.Stop(true); + EXPECT_TRUE(r->GetError().GetType() == ErrorCode::kBadParam); + EXPECT_TRUE(r->IsFinished()); + delete r; + delete t1; +} + +TEST_F(GlobalTxnTest, DoGetCellReaderCallback0) { + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1, "r1", "cf1", "qu")); + cells.push_back(new Cell(t1, "r1", "cf2", "qu")); + for(auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kNotFound, ""); + gtxn_.DoGetCellReaderCallback(inter_r); + EXPECT_TRUE(ctx->not_found_cnt == 1); + EXPECT_TRUE(ctx->fail_cell_cnt == 0); + EXPECT_TRUE(ctx->active_cell_cnt == 1); + thread_pool_.Stop(true); + EXPECT_FALSE(r_impl->IsFinished()); +} + +TEST_F(GlobalTxnTest, DoGetCellReaderCallback1) { + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1, "r1", "cf1", "qu")); + cells.push_back(new Cell(t1, "r1", "cf2", "qu")); + for(auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kOK, ""); + gtxn_.DoGetCellReaderCallback(inter_r); + EXPECT_TRUE(ctx->fail_cell_cnt == 0); + EXPECT_TRUE(ctx->not_found_cnt == 1); + EXPECT_TRUE(ctx->active_cell_cnt == 1); + thread_pool_.Stop(true); + EXPECT_FALSE(r_impl->IsFinished()); +} + +TEST_F(GlobalTxnTest, DoGetCellReaderCallback2) { + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(2, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1, "r1", "cf1", "qu")); + cells.push_back(new Cell(t1, "r1", "cf2", "qu")); + for(auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + gtxn_.DoGetCellReaderCallback(inter_r); + EXPECT_TRUE(ctx->fail_cell_cnt == 1); + EXPECT_TRUE(ctx->not_found_cnt == 0); + EXPECT_TRUE(ctx->active_cell_cnt == 1); + thread_pool_.Stop(true); + EXPECT_FALSE(r_impl->IsFinished()); +} + +TEST_F(GlobalTxnTest, DoGetCellReaderCallback3) { + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1, "r1", "cf1", "qu")); + for(auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + gtxn_.DoGetCellReaderCallback(inter_r); + thread_pool_.Stop(true); + EXPECT_TRUE(r_impl->IsFinished()); +} + +TEST_F(GlobalTxnTest, MergeCellToRow) { + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1, "r1", "cf1", "qu")); + for(auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + ErrorCode status; + status.SetFailed(ErrorCode::kSystem, ""); + gtxn_.MergeCellToRow(inter_r, status); + thread_pool_.Stop(true); + EXPECT_TRUE(r_impl->IsFinished()); +} + +TEST_F(GlobalTxnTest, GetCellCallback) { + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + InternalReaderContext* ctx = new InternalReaderContext(1, r_impl, >xn_); + r->SetContext(ctx); + std::vector cells; + cells.push_back(new Cell(t1, "r1", "cf1", "qu")); + for(auto& cell : cells) { + ctx->cell_map[cell] = 0; + } + RowReader* inter_r = t1->NewRowReader("r1"); + inter_r->SetContext(new CellReaderContext(cells[0], ctx)); + RowReaderImpl* inter_r_impl = static_cast(inter_r); + inter_r_impl->error_code_.SetFailed(ErrorCode::kSystem, ""); + gtxn_.GetCellCallback((CellReaderContext*)inter_r->GetContext()); + thread_pool_.Stop(true); + EXPECT_TRUE(r_impl->IsFinished()); +} + +TEST_F(GlobalTxnTest, RollForward) { + // can't find primary write cell + Table* t1 = OpenTable("t1"); + Cell cell(t1, "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r1"); + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + ErrorCode status; + + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1, gtxn_cfs); + ErrorCode mock_status; + mock_status.SetFailed(ErrorCode::kNotFound,""); + std::vector reader_errs; + reader_errs.push_back(mock_status); + (static_cast(t1))->AddReaderErrors(reader_errs); + gtxn_.RollForward(cell, primary, 0, &status); + EXPECT_TRUE(ErrorCode::kGTxnPrimaryLost == status.GetType()); +} + +TEST_F(GlobalTxnTest, CleanLock0) { + // cell same as primary + Table* t1 = OpenTable("t1"); + Cell cell(t1, "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r1"); + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + // init status is OK + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1, gtxn_cfs); + // only this cell will call mutation + ErrorCode mock_status1; + mock_status1.SetFailed(ErrorCode::kSystem,""); + std::vector mu_errs; + mu_errs.push_back(mock_status1); + (static_cast(t1))->AddMutationErrors(mu_errs); + // run test + gtxn_.CleanLock(cell, primary, &status); + EXPECT_TRUE(mock_status1.GetType() == status.GetType()); +} + +TEST_F(GlobalTxnTest, CleanLock1) { + // cell diff with primary + Table* t1 = OpenTable("t1"); + Cell cell(t1, "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r2"); // diff row + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + // init status is OK + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1, gtxn_cfs); + // mock primary return kSystem but cell kOK + // will get kSystem + ErrorCode mock_status1; + ErrorCode mock_status2; + mock_status1.SetFailed(ErrorCode::kSystem,""); + mock_status2.SetFailed(ErrorCode::kOK,""); + std::vector mu_errs; + mu_errs.push_back(mock_status1); + mu_errs.push_back(mock_status2); + (static_cast(t1))->AddMutationErrors(mu_errs); + // run test + gtxn_.CleanLock(cell, primary, &status); + EXPECT_TRUE(mock_status1.GetType() == status.GetType()); + EXPECT_TRUE(mock_status2.GetType() != status.GetType()); +} + +TEST_F(GlobalTxnTest, CleanLock2) { + // cell diff with primary + Table* t1 = OpenTable("t1"); + Cell cell(t1, "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r2"); // diff row + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + // init status is OK + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1, gtxn_cfs); + // mock primary return kOk but cell kSystem + // will get kSystem + ErrorCode mock_status1; + ErrorCode mock_status2; + mock_status1.SetFailed(ErrorCode::kOK,""); + mock_status2.SetFailed(ErrorCode::kSystem,""); + std::vector mu_errs; + mu_errs.push_back(mock_status1); + mu_errs.push_back(mock_status2); + (static_cast(t1))->AddMutationErrors(mu_errs); + // run test + gtxn_.CleanLock(cell, primary, &status); + EXPECT_TRUE(mock_status1.GetType() != status.GetType()); + EXPECT_TRUE(mock_status2.GetType() == status.GetType()); +} + +TEST_F(GlobalTxnTest, CleanLock3) { + // cell diff with primary + Table* t1 = OpenTable("t1"); + Cell cell(t1, "r1", "cf1", "qu"); + tera::PrimaryInfo primary; + primary.set_table_name("t1"); + primary.set_row_key("r2"); // diff row + primary.set_column_family("cf1"); + primary.set_qualifier("qu"); + primary.set_gtxn_start_ts(12); + // init status is OK + ErrorCode status; + status.SetFailed(ErrorCode::kOK); + std::set gtxn_cfs; + gtxn_.gtxn_internal_->tables_["t1"] = + std::pair>(t1, gtxn_cfs); + // mock primary return kTimeout but cell kSystem + // will get kSystem, the latest error will return + ErrorCode mock_status1; + ErrorCode mock_status2; + mock_status1.SetFailed(ErrorCode::kTimeout,""); + mock_status2.SetFailed(ErrorCode::kSystem,""); + std::vector mu_errs; + mu_errs.push_back(mock_status1); + mu_errs.push_back(mock_status2); + (static_cast(t1))->AddMutationErrors(mu_errs); + // run test + gtxn_.CleanLock(cell, primary, &status); + EXPECT_TRUE(mock_status1.GetType() != status.GetType()); + EXPECT_TRUE(mock_status2.GetType() == status.GetType()); +} + +void AddKeyValueToResult(const std::string& key, const std::string& cf, + const std::string& qu, int64_t timestamp, + const std::string& value, RowResult* result) { + KeyValuePair* kv = result->add_key_values(); + kv->set_key(key); + kv->set_column_family(cf); + kv->set_qualifier(qu); + kv->set_timestamp(timestamp); + kv->set_value(value); +} + +TEST_F(GlobalTxnTest, EncodeWriteValue) { + std::string ret = EncodeWriteValue(1, 100); + int type; + int64_t ts; + DecodeWriteValue(ret, &type, &ts); + + EXPECT_TRUE(type == 1); + EXPECT_TRUE(ts == 100); +} + +TEST_F(GlobalTxnTest, DecodeWriteValue) { + // a int bigger than mutaion type + std::string ret = EncodeWriteValue(99, 1000000); + int type; + int64_t ts; + DecodeWriteValue(ret, &type, &ts); + + EXPECT_TRUE(type == 99); + EXPECT_TRUE(ts == 1000000); +} + +TEST_F(GlobalTxnTest, FindValueFromResultRow0) { + // the success case + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 14; + AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + + AddKeyValueToResult("r1", "cf1", "qu1_W_", 15, EncodeWriteValue(0, 13), &result); + AddKeyValueToResult("r1", "cf1", "qu1_W_", 12, EncodeWriteValue(0, 9), &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + for (auto& cf : row) { + std::cout << cf.first << "\n"; + for (auto& qu : cf.second) { + std::cout << "\t" << qu.first << "\n"; + for (auto& v : qu.second) { + std::cout << "\t\tts=" << v.first << ",v=" << v.second << "\n"; + } + } + } + + // build target_cell + Cell target_cell(t1, "r1", "cf1", "qu1"); + + // run test + EXPECT_TRUE(gtxn_.FindValueFromResultRow(row, &target_cell)); + EXPECT_TRUE(target_cell.Timestamp() == 9); + EXPECT_TRUE(target_cell.Value() == "v1"); + + delete t1; + delete r; +} + +TEST_F(GlobalTxnTest, FindValueFromResultRow1) { + // the not found + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + r_impl->SetResult(result); + gtxn_.start_ts_ = 11; + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1, "r1", "cf1", "qu1"); + + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete t1; + delete r; +} + +TEST_F(GlobalTxnTest, FindValueFromResultRow2) { + // the not found write col + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + r_impl->SetResult(result); + + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1, "r1", "cf1", "qu1"); + + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete t1; + delete r; +} + +TEST_F(GlobalTxnTest, FindValueFromResultRow3) { + // the not found rigth version + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + + AddKeyValueToResult("r1", "cf1", "qu1_W_", 15, EncodeWriteValue(0, 13), &result); + // make ts = 9 v1 is deleted before this function called + AddKeyValueToResult("r1", "cf1", "qu1_W_", 12, EncodeWriteValue(1, 9), &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1, "r1", "cf1", "qu1"); + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete t1; + delete r; +} + +TEST_F(GlobalTxnTest, FindValueFromResultRow4) { + // the not found rigth version + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + AddKeyValueToResult("r1", "cf1", "qu1", 9, "v1", &result); + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + + // maybe other older version clean by gc, before this function called + AddKeyValueToResult("r1", "cf1", "qu1_W_", 15, EncodeWriteValue(0, 13), &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1, "r1", "cf1", "qu1"); + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete t1; + delete r; +} + +TEST_F(GlobalTxnTest, FindValueFromResultRow5) { + // the not found rigth version + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + // maybe version 1 was clean by gc, before this function called + AddKeyValueToResult("r1", "cf1", "qu1", 13, "v2", &result); + + AddKeyValueToResult("r1", "cf1", "qu1_W_", 15, EncodeWriteValue(0, 13), &result); + AddKeyValueToResult("r1", "cf1", "qu1_W_", 12, EncodeWriteValue(0, 9), &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1, "r1", "cf1", "qu1"); + // run test + EXPECT_FALSE(gtxn_.FindValueFromResultRow(row, &target_cell)); + + delete t1; + delete r; +} + +TEST_F(GlobalTxnTest, SetLastStatus) { + ErrorCode status; + status.SetFailed(ErrorCode::kOK, ""); + gtxn_.status_returned_ = false; + gtxn_.SetLastStatus(&status); + EXPECT_TRUE(gtxn_.status_returned_); + EXPECT_TRUE(gtxn_.status_.GetType() == status.GetType()); + + status.SetFailed(ErrorCode::kTimeout, ""); + gtxn_.status_returned_ = true; + gtxn_.SetLastStatus(&status); + EXPECT_TRUE(gtxn_.status_returned_); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); +} + +static bool g_callback_run_flag = false; + +TEST_F(GlobalTxnTest, RunUserCallback0) { + g_callback_run_flag = false; + gtxn_.SetCommitCallback([](Transaction* t) {g_callback_run_flag = true;}); + gtxn_.RunUserCallback(); + EXPECT_TRUE(g_callback_run_flag); +} + +static void WaitWapper(GlobalTxn* gtxn) { + gtxn->WaitForComplete(); + g_callback_run_flag = true; +} + +TEST_F(GlobalTxnTest, RunUserCallback1) { + g_callback_run_flag = false; + thread_pool_.AddTask(std::bind(&WaitWapper, >xn_)); + gtxn_.RunUserCallback(); + EXPECT_TRUE(gtxn_.finish_); + thread_pool_.Stop(true); + EXPECT_TRUE(g_callback_run_flag); +} + +TEST_F(GlobalTxnTest, BackoffAndMaybeCleanupLock0) { + bool try_clean = false; + ErrorCode status; + // make sure have lock_ts < start_ts + // can't found primary + Table* t1 = OpenTable("t1"); + RowReader* r = t1->NewRowReader("r1"); + RowReaderImpl* r_impl = static_cast(r); + + // build RowReader::TRow + // cf must exist before call FindValueFromResultRow + RowResult result; + gtxn_.start_ts_ = 11; + + // start_ts > lock ts and primary info is bad for parse + AddKeyValueToResult("r1", "cf1", "qu1_L_", 9, "primary info", &result); + r_impl->SetResult(result); + RowReader::TRow row; + r->ToMap(&row); + + // build target_cell + Cell target_cell(t1, "r1", "cf1", "qu1"); + // run test + gtxn_.BackoffAndMaybeCleanupLock(row, target_cell, try_clean, &status); + EXPECT_TRUE(status.GetType() == ErrorCode::kGTxnPrimaryLost); + delete t1; + delete r; +} + +TEST_F(GlobalTxnTest, RunAfterPrewriteFailed0) { + Table* t = OpenTable("t1"); + Cell cell(t, "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, w.TableName(), w.RowKey()); + ctx->status.SetFailed(ErrorCode::kOK, ""); + gtxn_.RunAfterPrewriteFailed(ctx); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kOK); +} + +TEST_F(GlobalTxnTest, RunAfterPrewriteFailed1) { + Table* t = OpenTable("t1"); + Cell cell(t, "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, w.TableName(), w.RowKey()); + ctx->status.SetFailed(ErrorCode::kTimeout, ""); + gtxn_.RunAfterPrewriteFailed(ctx); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); +} + +TEST_F(GlobalTxnTest, RunAfterPrewriteFailed2) { + Table* t = OpenTable("t1"); + Cell cell(t, "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, w.TableName(), w.RowKey()); + gtxn_.gtxn_internal_->is_timeout_ = true; + gtxn_.RunAfterPrewriteFailed(ctx); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); + delete t; +} + +TEST_F(GlobalTxnTest, DoPrewriteCallback0) { + // case a. global timeout + Table* t = OpenTable("t1"); + Transaction* txn = t->StartRowTransaction("r1"); + SingleRowTxn* stxn = static_cast(txn); + Cell cell(t, "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, w.TableName(), w.RowKey()); + stxn->SetContext(ctx); + gtxn_.gtxn_internal_->is_timeout_ = true; + gtxn_.DoPrewriteCallback(stxn); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); + delete t; +} + +TEST_F(GlobalTxnTest, DoPrewriteCallback1) { + // case b. this operator timeout + Table* t = OpenTable("t1"); + Transaction* txn = t->StartRowTransaction("r1"); + SingleRowTxn* stxn = static_cast(txn); + Cell cell(t, "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, w.TableName(), w.RowKey()); + stxn->SetContext(ctx); + stxn->mutation_buffer_.SetError(ErrorCode::kTimeout,""); + gtxn_.gtxn_internal_->is_timeout_ = false; + gtxn_.DoPrewriteCallback(stxn); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrewriteTimeout); + delete t; +} + +TEST_F(GlobalTxnTest, DoPrewriteCallback2) { + // case b. this operator error + Table* t = OpenTable("t1"); + Transaction* txn = t->StartRowTransaction("r1"); + SingleRowTxn* stxn = static_cast(txn); + Cell cell(t, "r1", "cf", "qu", 1, "val"); + Write w(cell); + std::vector ws; + ws.push_back(w); + PrewriteContext* ctx = new PrewriteContext(&ws, >xn_, w.TableName(), w.RowKey()); + stxn->SetContext(ctx); + stxn->mutation_buffer_.SetError(ErrorCode::kSystem,""); + gtxn_.gtxn_internal_->is_timeout_ = false; + gtxn_.DoPrewriteCallback(stxn); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kSystem); + delete t; +} + +TEST_F(GlobalTxnTest, VerifyPrimaryLocked) { + Table* t = OpenTable("t1"); + Cell cell(t, "r1", "cf", "qu", 1, "val"); + Write w(cell); + gtxn_.primary_write_ = &w; + + ErrorCode mock_status; + mock_status.SetFailed(ErrorCode::kNotFound,""); + std::vector reader_errs; + reader_errs.push_back(mock_status); + (static_cast(t))->AddReaderErrors(reader_errs); + + gtxn_.VerifyPrimaryLocked(); + EXPECT_TRUE(gtxn_.status_.GetType() == ErrorCode::kGTxnPrimaryLost); +} + + +} // namespace tera diff --git a/src/sdk/test/global_txn_test_tool.cc b/src/sdk/test/global_txn_test_tool.cc new file mode 100644 index 000000000..889e442fa --- /dev/null +++ b/src/sdk/test/global_txn_test_tool.cc @@ -0,0 +1,754 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#include "sdk/test/global_txn_test_tool.h" + +#include +#include +#include +#include + +#include +#include + +#include "common/base/string_ext.h" +#include "common/file/file_path.h" +#include "sdk/sdk_utils.h" +#include "sdk/client_impl.h" +#include "utils/config_utils.h" +#include "common/timer.h" +#include "version.h" + +DECLARE_string(tera_gtxn_test_flagfile); +DEFINE_string(gtxn_test_conf_dir, "../conf/", "gtxn test conf dir"); +DEFINE_string(gtxn_test_case_dir, "../cases/", "gtxn test cases dir"); +DEFINE_string(case_number, "", "gtxn test case number"); +DEFINE_bool(ignore_bad_case, false, "gtxn test ignore bad case"); +DEFINE_bool(gtxn_test_async_mode, false, "gtxn test async mode"); +DEFINE_bool(gtxn_test_debug_opened, false, "gtxn test debug opened"); +DEFINE_int32(gtxn_test_thread_pool_size, 20, "gtxn test thread pool size"); +DEFINE_bool(gtxn_test_drop_table_before, true, "gtxn test set drop tables before test"); + +namespace tera { +/** + * cases/ directory format + * + * CONF_ROOR/cases/1/schemas/table_1 [table schema file] + * .... + * CONF_ROOR/cases/1/schemas/table_x + * + * CONF_ROOR/cases/1/T_1/op_list [operations list] + * Format of op_list: + * + * TABLES:table_1,table_2,table_3 + * GET table_1 r1 cf1 qu1 + * PUT table_2 r2 cf2 qu2 valuex + * DEL table_3 r3 cf3 qu3 + * + * CONF_ROOR/cases/1/T_1/gtxn.flag [option] + * CONF_ROOR/cases/1/T_1/result_list [set result list] + * + * CONF_ROOR/cases/1/T_2/op_list + * CONF_ROOR/cases/1/T_1/gtxn.flag [option] + * CONF_ROOR/cases/1/T_2/result_list + * + **/ +bool GlobalTxnTestTool::LoadTestConf() { + // list cases + const std::string case_dir = FLAGS_gtxn_test_case_dir; + std::vector file_list; + if (IsEmpty(case_dir) || !ListCurrentDir(case_dir, &file_list)) { + LOG(ERROR) << "list cases failed, dir:" << case_dir; + return false; + } + for (auto it = file_list.begin(); it != file_list.end(); ++it) { + if (FLAGS_case_number != "" && (*it) != FLAGS_case_number) { + continue; + } + const std::string& dir_name = case_dir + (*it); + + if (!IsDir(dir_name)) { + continue; + } + + int case_num = atoi((*it).c_str()); + if (case_num <= 0) { + LOG(ERROR) << "load case failed, dir:" << dir_name; + return false; + } + // list cases/x/schemas/ + std::vector schema_files; + const std::string& schema_dir = dir_name + "/schemas/"; + if (IsEmpty(schema_dir) || !ListCurrentDir(schema_dir, &schema_files)) { + LOG(ERROR) << "list case(" << dir_name << ") schemas failed"; + return false; + } + int schema_cnt = 0; + for (auto sit = schema_files.begin(); sit != schema_files.end(); ++sit) { + const std::string& schema_file = schema_dir + (*sit); + if (IsDir(schema_file)) { + continue; + } + // load schemas + TableDescriptor* desc = new TableDescriptor(); + if (LoadDescriptor(schema_file, desc)) { + if (case_desc_map_.find(case_num) == case_desc_map_.end()) { + case_desc_map_[case_num] = std::vector(); + } + case_desc_map_[case_num].push_back(desc); + ++schema_cnt; + } else { + delete desc; + LOG(ERROR) << "load schema failed, schema_file:" << schema_file; + break; + } + } + if (schema_cnt == 0) { + LOG(ERROR) << "schemafile not found"; + return false; + } + + // mark cases/x/T_xx/ + std::vector txn_list; + if (!ListCurrentDir(dir_name, &txn_list)) { + LOG(ERROR) << "find txn dir failed, dir:" << dir_name; + return false; + } + int reg_cnt = 0; + for(auto it = txn_list.begin(); it != txn_list.end(); ++it) { + if (!IsDir(dir_name + "/" + (*it)) || *it == "schemas") { + continue; + } + if ((*it).find("T_") != std::string::npos) { + // find transaction + int gtxn_id = atoi(((*it).substr(2)).c_str()); + if (gtxn_id <= 0) { + LOG(ERROR) << "mark gtxn conf failed, dir:" + << case_dir << "/" << dir_name; + return false; + } else { + CaseRegister(case_num, gtxn_id); + ++reg_cnt; + } + } + } + if (reg_cnt == 0) { + LOG(ERROR) << "transaction not found"; + return false; + } + } + return true; +} + +void GlobalTxnTestTool::CaseRegister(const int case_num, const int gtxn_id) { + CasePair case_pair(case_num, gtxn_id); + case_list_.push_back(case_pair); +} + +bool GlobalTxnTestTool::LoadDescriptor(const std::string& schema_file, + TableDescriptor* table_desc) { + ErrorCode err; + if (!ParseTableSchemaFile(schema_file, table_desc, &err)) { + LOG(ERROR) << "fail to parse input table schema." << schema_file; + return false; + } + //ShowTableDescriptor(*table_desc, true); + return true; +} + +GlobalTxnTestTool::GlobalTxnTestTool(Client* client): + thread_pool_(FLAGS_gtxn_test_thread_pool_size), + client_(client) { +} + +void GlobalTxnTestTool::RunTest(tera::Client* client, int case_number) { + do_cnt_.Set(0); + done_cnt_.Set(0); + done_fail_cnt_.Set(0); + for (auto it = case_list_.begin(); it != case_list_.end(); ++it) { + CasePair case_pair = *it; + int case_num = case_pair.first; + if (case_number != -1 && case_num != case_number) { + continue; + } + int gtxn_id = case_pair.second; + + const std::string case_dir = FLAGS_gtxn_test_case_dir; + const std::string conf_dir = case_dir + std::to_string(case_num) + + "/T_" + std::to_string(gtxn_id); + const std::string& op_list_file = conf_dir + "/op_list"; + std::vector op_list; + std::ifstream ifile(op_list_file); + std::string line; + int cnt = 0; + while (std::getline(ifile, line)) { + if (cnt == 0) { + std::size_t found = line.find("TABLES:"); + if (found!=std::string::npos) { + std::vector tables; + SplitString(line.substr(found + 7), ",", &tables); + if (!OpenTestTables(tables)) { + return; + } + } + } else { + op_list.push_back(line); + } + ++cnt; + } + ifile.close(); + if (cnt < 1) { + LOG(ERROR) << "no operations in op_list"; + } + do_cnt_.Inc(); + ThreadPool::Task task = std::bind(&GlobalTxnTestTool::RunTestInternal, + this, client, case_num, gtxn_id, op_list); + thread_pool_.AddTask(task); + } +} + +void GlobalTxnTestTool::RunTestInternal(tera::Client* client, const int case_num, const int gtxn_id, + const std::vector& op_list) { + const std::string case_dir = FLAGS_gtxn_test_case_dir; + const std::string conf_dir = case_dir + std::to_string(case_num) + + "/T_" + std::to_string(gtxn_id); + + // make sure flagfile only service for this transaction + tera::Transaction* gtxn = nullptr; + { + MutexLock lock(&mu_); + FLAGS_tera_gtxn_test_flagfile = conf_dir + "/gtxn.flag"; + gtxn = client->NewGlobalTransaction(); + } + + if (!FLAGS_gtxn_test_async_mode) { + std::vector result; + for (auto it = op_list.begin(); it != op_list.end(); ++it) { + const std::string& op_str = *it; + VLOG(12) << "OPERATION:" << op_str; + OpType op_type; + std::vector op_args; + if (!ParseOp(op_str, &op_type, &op_args) + || !DoOp(gtxn, op_type, op_args, &result)) { + LOG(ERROR) << gtxn->GetError().ToString(); + delete gtxn; + done_cnt_.Inc(); + return; + } + } + gtxn->Commit(); + result.push_back(std::to_string(gtxn->GetError().GetType())); + if(!CheckResult(case_num, gtxn_id, result)) { + done_fail_cnt_.Inc(); + } + delete gtxn; + done_cnt_.Inc(); + } else { + if (op_list.size() > 0) { + GTxnTestContext* ctx = new GTxnTestContext(); + ctx->tool = this; + ctx->gtxn = gtxn; + ctx->op_list = op_list; + ctx->case_num = case_num; + ctx->gtxn_id = gtxn_id; + ctx->it = ctx->op_list.begin(); + const std::string& op_str = *(ctx->it); + VLOG(12) << "OPERATION:" << op_str; + OpType op_type; + std::vector op_args; + if (!ParseOp(op_str, &op_type, &op_args)) { + LOG(ERROR) << "parse op failed"; + delete ctx->gtxn; + delete ctx; + done_cnt_.Inc(); + return; + } + DoOpAsync(ctx, op_type, op_args); + } else { + LOG(ERROR) << "not set operators"; + delete gtxn; + done_cnt_.Inc(); + } + } +} + +bool GlobalTxnTestTool::OpenTestTables(const std::vector& tables) { + ErrorCode err; + MutexLock lock(&mu_); + for(auto it = tables.begin(); it != tables.end(); ++it) { + const std::string tablename = *it; + if (tables_.find(tablename) == tables_.end()) { + Table* table = client_->OpenTable(tablename, &err); + if (table == NULL) { + return false; + } + tables_[tablename] = table; + } + } + return true; +} + +void GlobalTxnTestTool::DoOpAsync(GTxnTestContext* ctx, + const OpType& op_type, + const std::vector& op_args) { + if (op_args.size() < 4) { + return; + } + Table* table = nullptr; + const std::string tablename = op_args[0]; + auto table_it = tables_.find(tablename); + if (table_it != tables_.end()) { + table = table_it->second; + } else { + return; + } + const std::string row = op_args[1]; + const std::string cf = op_args[2]; + const std::string qu = op_args[3]; + if (op_type == OpType::PUT && op_args.size() == 5) { + const std::string value = op_args[4]; + tera::RowMutation* m = table->NewRowMutation(row); + m->Put(cf, qu, value); + ctx->gtxn->ApplyMutation(m); + ctx->result.push_back("PUT: " + std::to_string(ctx->gtxn->GetError().GetType())); + delete m; + } else if (op_type == OpType::GET && op_args.size() == 4) { + tera::RowReader* r = table->NewRowReader(row); + r->AddColumn(cf, qu); + r->SetCallBack([] (RowReader* r) { + ((GTxnTestContext*)r->GetContext())->tool->DoOpAsyncCallback(r); + }); + r->SetContext(ctx); + ctx->gtxn->Get(r); + return; + } else if (op_type == OpType::DEL && op_args.size() == 4) { + tera::RowMutation* m = table->NewRowMutation(row); + m->DeleteColumns(cf, qu); + ctx->gtxn->ApplyMutation(m); + ctx->result.push_back("DEL: " + std::to_string(ctx->gtxn->GetError().GetType())); + delete m; + } + + // this operation is muation , run next operation + if (op_type == OpType::PUT || op_type == OpType::DEL) { + if (++ctx->it != ctx->op_list.end()) { + const std::string& op_str = *(ctx->it); + VLOG(12) << "OPERATION:" << op_str; + OpType next_op_type; + std::vector next_op_args; + if (!ParseOp(op_str, &next_op_type, &next_op_args)) { + LOG(ERROR) << "parse op failed"; + delete ctx->gtxn; + delete ctx; + done_cnt_.Inc(); + return; + } + DoOpAsync(ctx, next_op_type, next_op_args); + } else { + ctx->gtxn->SetCommitCallback([] (Transaction* t) { + ((GTxnTestContext*)t->GetContext())->tool->DoCommitCallback(t); + }); + ctx->gtxn->SetContext(ctx); + ctx->gtxn->Commit(); + } + } +} + +void GlobalTxnTestTool::DoOpAsyncCallback(RowReader* r) { + GTxnTestContext* ctx = (GTxnTestContext*)r->GetContext(); + if (r->GetError().GetType() == ErrorCode::kOK) { + while (!r->Done()) { + const std::string& result_item = "GET: " + + std::to_string(r->GetError().GetType()) + " " + + std::to_string(r->Timestamp()) + ":" + r->Value(); + ctx->result.push_back(result_item); + r->Next(); + } + } else if (r->GetError().GetType() == ErrorCode::kNotFound) { + ctx->result.push_back("GET: " + std::to_string(r->GetError().GetType())); + } else { + ctx->result.push_back("GET: " + std::to_string(r->GetError().GetType())); + } + delete r; + // if not last, call next operation + if (++ctx->it != ctx->op_list.end()) { + const std::string& op_str = *(ctx->it); + VLOG(12) << "OPERATION:" << op_str; + OpType next_op_type; + std::vector next_op_args; + if (!ParseOp(op_str, &next_op_type, &next_op_args)) { + LOG(ERROR) << "parse op failed"; + delete ctx->gtxn; + delete ctx; + done_cnt_.Inc(); + return; + } + DoOpAsync(ctx, next_op_type, next_op_args); + } else { + ctx->gtxn->SetCommitCallback([] (Transaction* t) { + ((GTxnTestContext*)t->GetContext())->tool->DoCommitCallback(t); + }); + ctx->gtxn->SetContext(ctx); + ctx->gtxn->Commit(); + } +} + +void GlobalTxnTestTool::DoCommitCallback(Transaction* t) { + GTxnTestContext* ctx = (GTxnTestContext*)t->GetContext(); + + ctx->result.push_back(std::to_string(t->GetError().GetType())); + if (!CheckResult(ctx->case_num, ctx->gtxn_id, ctx->result)) { + done_fail_cnt_.Inc(); + } + delete ctx; + delete t; + done_cnt_.Inc(); +} + +bool GlobalTxnTestTool::DoOp(tera::Transaction* gtxn, + const OpType& op_type, + const std::vector& op_args, + std::vector* result) { + if (op_args.size() < 4) { + return false; + } + Table* table = nullptr; + const std::string tablename = op_args[0]; + auto table_it = tables_.find(tablename); + if (table_it != tables_.end()) { + table = table_it->second; + } else { + return false; + } + const std::string row = op_args[1]; + const std::string cf = op_args[2]; + const std::string qu = op_args[3]; + if (op_type == OpType::PUT && op_args.size() == 5) { + const std::string value = op_args[4]; + std::unique_ptr m(table->NewRowMutation(row)); + m->Put(cf, qu, value); + gtxn->ApplyMutation(m.get()); + result->push_back("PUT: " + std::to_string(gtxn->GetError().GetType())); + return true; + } else if (op_type == OpType::GET && op_args.size() == 4) { + std::unique_ptr r(table->NewRowReader(row)); + r->AddColumn(cf, qu); + gtxn->Get(r.get()); + if (r->GetError().GetType() == ErrorCode::kOK) { + while (!r->Done()) { + const std::string& result_item = "GET: " + + std::to_string(r->GetError().GetType()) + " " + + std::to_string(r->Timestamp()) + ":" + r->Value(); + result->push_back(result_item); + r->Next(); + } + return true; + } else if (r->GetError().GetType() == ErrorCode::kNotFound) { + result->push_back("GET: " + std::to_string(r->GetError().GetType())); + return true; + } else { + result->push_back("GET: " + std::to_string(r->GetError().GetType())); + } + } else if (op_type == OpType::DEL && op_args.size() == 4) { + std::unique_ptr m(table->NewRowMutation(row)); + m->DeleteColumns(cf, qu); + gtxn->ApplyMutation(m.get()); + result->push_back("DEL: " + std::to_string(gtxn->GetError().GetType())); + return true; + } + return false; +} + +bool GlobalTxnTestTool::ParseOp(const std::string& op_str, + OpType* op_type, std::vector* op_args) { + std::vector args; + SplitString(op_str, " ", &args); + if (TrimString(args[0]) == "PUT") { + *op_type = OpType::PUT; + } else if (TrimString(args[0]) == "GET") { + *op_type = OpType::GET; + } else if (TrimString(args[0]) == "DEL") { + *op_type = OpType::DEL; + } else { + LOG(ERROR) << "operation type not support :[" << TrimString(args[0]) << "]"; + return false; + } + for (size_t i = 1; i < args.size(); ++i) { + op_args->push_back(TrimString(args[i])); + } + return true; +} + +void GlobalTxnTestTool::DebugOpList(const std::string& op_list_file) { + std::vector op_list; + std::ifstream ofile(op_list_file); + std::string line; + int cnt = 0; + while (std::getline(ofile, line)) { + op_list.push_back(line); + ++cnt; + } + ofile.close(); + if (cnt < 1) { + LOG(ERROR) << "no operators in op_list"; + } + std::cout << "OpList:" << std::endl; + for (auto l : op_list) { + std::cout << l < flag_list; + std::ifstream ofile(flag_file); + std::string line; + int cnt = 0; + while (std::getline(ofile, line)) { + flag_list.push_back(line); + ++cnt; + } + ofile.close(); + if (cnt < 1) { + LOG(ERROR) << "no flags in gtxn.flag"; + } + std::cout << "FLAGS:" << std::endl; + for (auto f : flag_list) { + std::string flag = TrimString(f); + if (flag.length() > 0 && flag[0] == '#') { + continue; + } + std::cout << flag <& result) { + MutexLock lock(&mu_); + const std::string case_dir = FLAGS_gtxn_test_case_dir; + const std::string conf_dir = case_dir + std::to_string(case_num) + + "/T_" + std::to_string(gtxn_id); + std::cout << "===========================================" << std::endl; + std::cout << "CASE:" << case_num << " GTXN_ID:" << gtxn_id << std::endl; + if (FLAGS_gtxn_test_debug_opened) { + const std::string& op_list_file = conf_dir + "/op_list"; + const std::string& flag_file = conf_dir + "/gtxn.flag"; + DebugOpList(op_list_file); + DebugFlagFile(flag_file); + std::cout << "Result Printing:" << std::endl; + for (auto it = result.begin(); it != result.end(); ++it) { + std::cout << "RESULT:" << *it << std::endl; + } + std::cout << "-------------------------------------------" << std::endl; + } + + VLOG(12) << "case:" << case_num + << " gtxn_id:" << gtxn_id << " Printing"; + for (auto it = result.begin(); it != result.end(); ++it) { + VLOG(12) << "RESULT:" << *it; + } + + const std::string& result_list_file = conf_dir + "/result_list"; + std::vector result_list; + std::ifstream ofile(result_list_file); + std::string line; + int cnt = 0; + while (std::getline(ofile, line)) { + result_list.push_back(line); + ++cnt; + } + ofile.close(); + if (cnt < 1) { + LOG(ERROR) << "no results in result_list"; + return false; + } + + if (result_list.size() != result.size()) { + std::cout << "\tERROR[expect_line_count: " << result_list.size() << " actual_line_count: " << result.size() << "]\n"; + return false; + } else { + int have_diff = 0; + for (size_t i = 0; i < result.size(); ++i) { + const std::string& ret = result[i]; + const std::string& default_ret = result_list[i]; + if (TrimString(ret) != TrimString(default_ret)) { + std::cout << "\tERROR[expect: (" << default_ret << ") actual: (" << ret << ")]\n"; + ++have_diff; + } + } + if (have_diff > 0) { + std::cout << "FAILED :" << have_diff << std::endl; + return false; + } + } + std::cout << "SUCCEED" << std::endl; + return true; +} + +bool GlobalTxnTestTool::InitTestTables(int case_num) { + ErrorCode err; + std::unordered_map table_map; + for (auto it = case_desc_map_.begin(); it != case_desc_map_.end(); ++it) { + if (case_num != -1 && case_num != it->first) { + continue; + } + std::vector& desc_list = it->second; + for (auto dit = desc_list.begin(); dit != desc_list.end(); ++dit) { + TableDescriptor* desc = (*dit); + const std::string& tablename = desc->TableName(); + if (table_map.find(tablename) == table_map.end()) { + table_map[tablename] = desc; + } + } + } + + for (auto& table : table_map) { + if (client_->CreateTable(*(table.second), &err) && err.GetType() == ErrorCode::kOK) { + VLOG(12) << "create table " << table.first << " ok"; + } else { + LOG(ERROR) << "create table " << table.first << " failed"; + return false; + } + } + return true; +} + +bool GlobalTxnTestTool::DropTestTables(int case_num) { + ErrorCode err; + std::unordered_map table_map; + for (auto it = case_desc_map_.begin(); it != case_desc_map_.end(); ++it) { + if (case_num != -1 && case_num != it->first) { + continue; + } + std::vector& desc_list = it->second; + for (auto dit = desc_list.begin(); dit != desc_list.end(); ++dit) { + TableDescriptor* desc = (*dit); + const std::string& tablename = desc->TableName(); + if (table_map.find(tablename) == table_map.end()) { + table_map[tablename] = desc; + } + } + } + + for (auto& table : table_map) { + const std::string& tablename = table.first; + if (!client_->DisableTable(tablename, &err)) { + LOG(ERROR) << "disable table failed, table: " << tablename; + return false; + } + TableMeta table_meta; + TabletMetaList tablet_list; + tera::ClientImpl* client_impl = static_cast(client_); + if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << tablename; + return false; + } + + uint64_t tablet_num = tablet_list.meta_size(); + while (true) { + if (!client_impl->ShowTablesInfo(tablename, &table_meta, &tablet_list, &err)) { + LOG(ERROR) << "table not exist: " << tablename; + return false; + } + uint64_t tablet_cnt = 0; + for (int32_t i = 0; i < tablet_list.meta_size(); ++i) { + const TabletMeta& tablet = tablet_list.meta(i); + if (tablet.status() == kTabletDisable || tablet.status() == kTableOffLine) { + tablet_cnt++; + } + } + if (tablet_cnt == tablet_num) { + // disable finish + break; + } + sleep(1); + } + + if (!client_->DropTable(tablename, &err)) { + LOG(ERROR) << "drop table " << tablename << " failed"; + return false; + } + } + return true; +} + +void GlobalTxnTestTool::Wait() { + while(do_cnt_.Get() > done_cnt_.Get()) { + sleep(1); + } +} + +void GlobalTxnTestTool::RunCaseOneByOne() { + std::set cases; + for (auto it = case_list_.begin(); it != case_list_.end(); ++it) { + CasePair case_pair = *it; + int case_num = case_pair.first; + cases.insert(case_num); + } + for (auto& case_num : cases) { + LOG(INFO) << "GlobalTxnTest Case " << case_num << " Begin"; + // drop table + if (FLAGS_gtxn_test_drop_table_before) { + DropTestTables(case_num); + } + + if (!InitTestTables(case_num)) { + LOG(ERROR) << "GlobalTxnTest Case " << case_num + << " InitTestTables Failed"; + if (FLAGS_ignore_bad_case == true) { + continue; + } else { + break; + } + } + RunTest(client_, case_num); + Wait(); + LOG(INFO) << "GlobalTxnTest Case " << case_num << " Finish"; + if (done_fail_cnt_.Get() > 0) { + if (FLAGS_ignore_bad_case == true) { + continue; + } else { + break; + } + } + } +} + +} // namespace tera + + +int main(int argc, char *argv[]){ + ::google::ParseCommandLineFlags(&argc, &argv, true); + + if (argc > 1 && std::string(argv[1]) == "version") { + PrintSystemVersion(); + return 0; + } + if (FLAGS_gtxn_test_conf_dir == "") { + LOG(ERROR) << "not set \"--gtxn_test_conf_dir\""; + return -1; + } + if (FLAGS_gtxn_test_case_dir == "") { + LOG(ERROR) << "not set \"--gtxn_test_case_dir\""; + return -1; + } + + tera::ErrorCode error_code; + tera::Client* client = tera::Client::NewClient(FLAGS_gtxn_test_conf_dir + "/tera.flag", + &error_code); + if (client == NULL) { + return -1; + } + + tera::GlobalTxnTestTool gtxn_test_tool(client); + // init table + if (!gtxn_test_tool.LoadTestConf()) { + return -1; + } + gtxn_test_tool.RunCaseOneByOne(); + return 0; +} diff --git a/src/sdk/test/global_txn_test_tool.h b/src/sdk/test/global_txn_test_tool.h new file mode 100644 index 000000000..7acf12644 --- /dev/null +++ b/src/sdk/test/global_txn_test_tool.h @@ -0,0 +1,95 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_SDK_TEST_GLOBAL_TXN_TEST_TOOL_H_ +#define TERA_SDK_TEST_GLOBAL_TXN_TEST_TOOL_H_ + +#include + +#include "common/thread_pool.h" +#include "common/counter.h" +#include "tera.h" + +namespace tera { + +enum OpType { GET, PUT, DEL }; + +class GlobalTxnTestTool; + + +struct GTxnTestContext { + GlobalTxnTestTool* tool; + tera::Transaction* gtxn; + std::vector op_list; + std::vector result; + std::vector::iterator it; + int case_num; + int gtxn_id; +}; + +class GlobalTxnTestTool { +public: + GlobalTxnTestTool(Client* client); + ~GlobalTxnTestTool(){} + + bool LoadTestConf(); + + bool InitTestTables(int case_num = -1); + + bool DropTestTables(int case_num = -1); + + void RunTest(tera::Client* client, int case_num = -1); + + void Wait(); + + void RunCaseOneByOne(); +private: + void RunTestInternal(tera::Client* client, const int case_num, const int gtxn_id, + const std::vector& op_list); + + void CaseRegister(const int case_num, const int gtxn_id); + + bool LoadDescriptor(const std::string& schema_file, TableDescriptor* schema); + + void DebugOpList(const std::string& op_list_file); + + void DebugFlagFile(const std::string& flag_file); + + bool CheckResult(const int case_num, const int gtxn_id, + const std::vector& result); + + bool ParseOp(const std::string& op_str, + OpType* op_type, std::vector* op_args); + + bool DoOp(tera::Transaction* gtxn, + const OpType& op_type, + const std::vector& op_args, + std::vector* result); + + void DoOpAsync(GTxnTestContext* ctx, const OpType& op_type, + const std::vector& op_args); + + void DoOpAsyncCallback(tera::RowReader* r); + + void DoCommitCallback(tera::Transaction* t); + + bool OpenTestTables(const std::vector& tables); + +private: + typedef std::pair CasePair; + std::vector case_list_; + typedef std::map> CaseDescMap; + CaseDescMap case_desc_map_; + std::map tables_; + mutable Mutex mu_; + common::ThreadPool thread_pool_; + Client* client_; + Counter do_cnt_; + Counter done_cnt_; + Counter done_fail_cnt_; +}; + +} // namespace tera + +#endif // TERA_SDK_TEST_GLOBAL_TXN_TEST_TOOL_H_ diff --git a/src/sdk/test/global_txn_testutils.cc b/src/sdk/test/global_txn_testutils.cc new file mode 100644 index 000000000..c615489d7 --- /dev/null +++ b/src/sdk/test/global_txn_testutils.cc @@ -0,0 +1,178 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include "common/base/string_ext.h" +#include "common/this_thread.h" +#include "sdk/test/global_txn_testutils.h" +#include "utils/config_utils.h" +#include "common/timer.h" + +DEFINE_bool(tera_gtxn_test_opened, false, "for test gtxn opened"); +DEFINE_bool(tera_gtxn_test_isolation_snapshot, true, "true means Snapshot, false means ReadCommitedSnapshot"); +DEFINE_string(tera_gtxn_test_flagfile, "", "gtxn test flagfile"); +DEFINE_int64(start_ts, 1, "start ts"); +DEFINE_int64(begin_commit_ts, 0, "time to wait before begin commit"); +DEFINE_int64(begin_prewrite_ts, 0, "time to wait before prewrite"); +DEFINE_int64(end_prewrite_ts, 0, "time to wait from before prewrite"); +DEFINE_int64(commit_ts, 1, "time to wait from end prewrite"); +DEFINE_int64(begin_primary_commit_ts, 0, "time to wait before primary commit"); +DEFINE_int64(end_primary_commit_ts, 0, "time to wait from primary commit"); +DEFINE_int64(begin_other_commit_ts, 0, "time to wait before other commit"); +DEFINE_string(get_wait_ts_list, "", "timestamp list for wait to get"); + +namespace tera { + +constexpr int64_t kMillisPerSecond = 1000L; + +GlobalTxnTestHelper::GlobalTxnTestHelper(const std::string& conffile) : + pos_(0), get_pos_(0), conf_file_(conffile), + start_ts_(0), prewrite_start_ts_(0), commit_ts_(0), + helper_create_time_(get_millis()) { +} + +void GlobalTxnTestHelper::LoadTxnConf() { + utils::LoadFlagFile(conf_file_); + ts_[0] = FLAGS_start_ts; + start_ts_ = FLAGS_start_ts; + ts_[1] = FLAGS_begin_commit_ts; + ts_[2] = FLAGS_begin_prewrite_ts; + ts_[3] = FLAGS_end_prewrite_ts; + ts_[4] = FLAGS_commit_ts; + ts_[5] = FLAGS_begin_primary_commit_ts; + ts_[6] = FLAGS_end_primary_commit_ts; + ts_[7] = FLAGS_begin_other_commit_ts; + VLOG(13) << "split get wait ts list begin..."; + SplitString(FLAGS_get_wait_ts_list, ",", &get_ts_list_); + for (auto item : get_ts_list_) { + VLOG(13) << item; + } + VLOG(13) << "split get wait ts list done"; + // if isolation_level == ReadCommitedSnapshot + if (!FLAGS_tera_gtxn_test_isolation_snapshot) { + prewrite_start_ts_ = FLAGS_start_ts + FLAGS_begin_commit_ts + FLAGS_begin_prewrite_ts; + } else { + prewrite_start_ts_ = start_ts_; + } + commit_ts_ = FLAGS_start_ts + FLAGS_begin_commit_ts + FLAGS_begin_prewrite_ts + + FLAGS_end_prewrite_ts + FLAGS_commit_ts; + if (commit_ts_ <= prewrite_start_ts_) { + commit_ts_ = prewrite_start_ts_ + 1; + } + Wait(ts_[0]); +} + +int64_t GlobalTxnTestHelper::GetStartTs() { + return start_ts_; +} + +int64_t GlobalTxnTestHelper::GetPrewriteStartTs() { + return prewrite_start_ts_; +} + +int64_t GlobalTxnTestHelper::GetCommitTs() { + return commit_ts_; +} + +void GlobalTxnTestHelper::GetWait(int64_t start_ts) { + if (get_ts_list_.size() == 0) { + // don't wait + VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater immediate"; + } else { + // get operaters in 'get_ts_list' will wait by 'get_ts_list' set, + // not in get_ts_list will immediate GET after the last 'get_ts_list' item finished + if (get_pos_ < get_ts_list_.size()) { + int64_t now_millis = tera::get_millis(); + int64_t def_wait_time = stol(get_ts_list_[get_pos_]) * kMillisPerSecond; + int64_t wait_time = helper_create_time_ + def_wait_time - now_millis; + VLOG(13) << "get_pos_:" << get_pos_ + << " now_millis:" << now_millis + << " def_wait_time:" << def_wait_time + << " size:" << get_ts_list_.size() + << " wait_time:" << wait_time; + if (wait_time > 0) { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" + << (get_pos_ + 1) << ") after" << wait_time << " ms."; + ThisThread::Sleep(wait_time); + } else { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" + << (get_pos_ + 1) << ") immediate"; + } + } else { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] will do get operater(" + << (get_pos_ + 1) << ") immediate"; + } + get_pos_++; + } +} + +void GlobalTxnTestHelper::Wait(int64_t start_ts) { + int wait_position = pos_++; + int64_t* info = ts_; + int64_t now_micros = tera::get_micros(); + if (wait_position == 0) { + PrintLog(start_ts, "begin txn", info[wait_position + 1]); + } else { + if (info[wait_position] == -1) { + ExitNow(start_ts, wait_position); + } + int64_t should_wait = info[wait_position] * 1000000L + info[wait_position - 1]; + if (should_wait - now_micros > 10) { + ThisThread::Sleep((should_wait - now_micros) / 1000L); + } else if (info[wait_position] == 0) { + // nothing to do + } else if (should_wait < now_micros) { + LOG(ERROR) << "[gtxn_helper] [" << start_ts << "] txn run timeout, exited"; + _Exit(0); + } + switch (wait_position) { + case 1: + PrintLog(start_ts, "begin commit", info[wait_position + 1]); + break; + case 2: + PrintLog(start_ts, "begin prewrite", info[wait_position + 1]); + break; + case 3: + PrintLog(start_ts, "end prewrite", info[wait_position + 1]); + break; + case 4: + PrintLog(start_ts, "begin real commit", info[wait_position + 1]); + break; + case 5: + PrintLog(start_ts, "begin primary commit", info[wait_position + 1]); + break; + case 6: + PrintLog(start_ts, "end primary commit", info[wait_position + 1]); + break; + case 7: + PrintLog(start_ts, "begin other commit"); + break; + default: + LOG(ERROR) << "overflow position"; + _Exit(0); + } + } + info[wait_position] = tera::get_micros(); + return; +} + +void GlobalTxnTestHelper::ExitNow(int64_t start_ts, int position) { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] exit @ position=" << position; + _Exit(0); // for simulate test gtxn stop at anywhere +} + +void GlobalTxnTestHelper::PrintLog(int64_t start_ts, + const std::string& log_str, + int64_t next_wait_time) { + if (next_wait_time == -1) { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] " << log_str << ", txn will be done."; + } else { + VLOG(13) << "[gtxn_helper] [" << start_ts << "] " << log_str + << ", next step will begin after [" << next_wait_time << "s]"; + } +} + +} // namespace tera + diff --git a/src/sdk/test/global_txn_testutils.h b/src/sdk/test/global_txn_testutils.h new file mode 100644 index 000000000..278ef8e68 --- /dev/null +++ b/src/sdk/test/global_txn_testutils.h @@ -0,0 +1,41 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_SDK_TEST_GLOBAL_TXN_TESTUTILS_H_ +#define TERA_SDK_TEST_GLOBAL_TXN_TESTUTILS_H_ + +#include + +namespace tera { + +class GlobalTxnTestHelper { +public: + GlobalTxnTestHelper(const std::string& conffile); + ~GlobalTxnTestHelper(){} + int64_t GetStartTs(); + int64_t GetPrewriteStartTs(); + int64_t GetCommitTs(); + void Wait(int64_t start_ts); + void GetWait(int64_t start_ts); + void LoadTxnConf(); +private: + + void ExitNow(int64_t start_ts, int position); + void PrintLog(int64_t start_ts, + const std::string& log_str, + int64_t next_wait_time = -1); + int pos_; + size_t get_pos_; + std::string conf_file_; + int64_t start_ts_; + int64_t prewrite_start_ts_; + int64_t commit_ts_; + int64_t ts_[8]; + std::vector get_ts_list_; + int64_t helper_create_time_; +}; + +} // namespace tera + +#endif // TERA_SDK_TEST_GLOBAL_TXN_TESTUTILS_H_ diff --git a/src/sdk/test/mock_table.h b/src/sdk/test/mock_table.h new file mode 100644 index 000000000..5d1a75e3b --- /dev/null +++ b/src/sdk/test/mock_table.h @@ -0,0 +1,78 @@ +// Copyright (c) 2015-2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// +// Author: baorenyi@baidu.com + +#ifndef TERA_SDK_TEST_MOCK_TABLE_H_ +#define TERA_SDK_TEST_MOCK_TABLE_H_ + +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "sdk/read_impl.h" +#include "sdk/mutate_impl.h" +#include "sdk/table_impl.h" + +namespace tera { + +struct MockReaderResult { + RowResult result; + ErrorCode status; +}; + +class MockTable: public TableImpl { +public: + MockTable(const std::string& table_name, + common::ThreadPool* thread_pool, + sdk::ClusterFinder* cluster = NULL) + : TableImpl(table_name, thread_pool, cluster) { + reader_err_.clear(); + mu_err_.clear(); + reader_pos_ = 0; + mu_pos_ = 0; + } + void ApplyMutation(RowMutation* row_mu) { + RowMutationImpl* mu = static_cast(row_mu); + mu->SetError(mu_err_[mu_pos_++].GetType(),""); + mu->RunCallback(); + } + + void Get(RowReader* reader) { + RowReaderImpl* r = static_cast(reader); + if (reader_result_.size() > 0) { + r->SetResult(reader_result_[reader_pos_].result); + r->SetError(reader_result_[reader_pos_++].status.GetType(), ""); + } else { + r->SetError(reader_err_[reader_pos_++].GetType(), ""); + } + r->RunCallback(); + } + + void AddReaderResult(const std::vector& results) { + reader_result_.insert(reader_result_.end(), + results.begin(), results.end()); + } + + void AddReaderErrors(const std::vector& errs) { + reader_err_.insert(reader_err_.end(), errs.begin(), errs.end()); + } + + void AddMutationErrors(const std::vector& errs) { + mu_err_.insert(mu_err_.end(), errs.begin(), errs.end()); + } +private: + std::vector reader_err_; + std::vector mu_err_; + std::vector reader_result_; + int reader_pos_; + int mu_pos_; +}; + +} // namespace tera + +#endif // TERA_SDK_TEST_MOCK_TABLE_H_ diff --git a/src/sdk/test/scan_impl_test.cc b/src/sdk/test/scan_impl_test.cc index abef2d305..475e2ff1c 100644 --- a/src/sdk/test/scan_impl_test.cc +++ b/src/sdk/test/scan_impl_test.cc @@ -49,21 +49,6 @@ class ScanDescImplTest : public ::testing::Test, public ScanDescImpl { TableSchema table_schema_; }; -TEST_F(ScanDescImplTest, GetCfType) { - string cf_name, type; - - cf_name = "cf0"; - EXPECT_TRUE(GetCfType(cf_name, &type)); - EXPECT_EQ(type, "int32"); - - cf_name = "cf2"; - EXPECT_TRUE(GetCfType(cf_name, &type)); - EXPECT_EQ(type, "binary"); - - cf_name = "cf100"; - EXPECT_FALSE(GetCfType(cf_name, &type)); -} - TEST_F(ScanDescImplTest, ParseValueCompareFilter) { string filter_str; Filter filter; @@ -76,21 +61,19 @@ TEST_F(ScanDescImplTest, ParseValueCompareFilter) { filter_str = "qualifier10"; EXPECT_FALSE(ParseValueCompareFilter(filter_str, &filter)); - filter_str = "cf0==-10"; + filter_str = "int64cf0==-10"; EXPECT_TRUE(ParseValueCompareFilter(filter_str, &filter)); EXPECT_EQ(filter.type(), BinComp); EXPECT_EQ(filter.bin_comp_op(), EQ); EXPECT_EQ(filter.field(), ValueFilter); EXPECT_EQ(filter.content(), "cf0"); - filter_str = "cf1>1"; + filter_str = "int64cf1>1"; EXPECT_TRUE(ParseValueCompareFilter(filter_str, &filter)); EXPECT_EQ(filter.bin_comp_op(), GT); filter_str = "cf2==hello"; - EXPECT_TRUE(ParseValueCompareFilter(filter_str, &filter)); - EXPECT_EQ(filter.bin_comp_op(), EQ); - EXPECT_EQ(filter.ref_value(), "hello"); + EXPECT_FALSE(ParseValueCompareFilter(filter_str, &filter)); } TEST_F(ScanDescImplTest, ParseSubFilterString) { @@ -104,33 +87,15 @@ TEST_F(ScanDescImplTest, ParseSubFilterString) { filter_str = "qual@ifier10"; EXPECT_FALSE(ParseSubFilterString(filter_str, &filter)); - filter_str = "cf0 == -10"; + filter_str = "int64cf0 == -10"; EXPECT_TRUE(ParseSubFilterString(filter_str, &filter)); EXPECT_EQ(filter.type(), BinComp); EXPECT_EQ(filter.bin_comp_op(), EQ); EXPECT_EQ(filter.field(), ValueFilter); EXPECT_EQ(filter.content(), "cf0"); - filter_str = "cf1 > 1"; + filter_str = "int64cf1 > 1"; EXPECT_TRUE(ParseSubFilterString(filter_str, &filter)); EXPECT_EQ(filter.bin_comp_op(), GT); } - -TEST_F(ScanDescImplTest, ParseFilterString) { - string filter_str; - - filter_str = "cf0 < 10 AND cf1 >100 AND cf2 == world"; - SetFilterString(filter_str); - EXPECT_TRUE(ParseFilterString()); - EXPECT_EQ(filter_list_.filter_size(), 3); - - filter_str = "cf < 10 AND cf1 >100 AND cf2 == world"; - SetFilterString(filter_str); - EXPECT_FALSE(ParseFilterString()); - - filter_str = "cf0 < 10 OR cf1 >100 AND cf2 == world"; - SetFilterString(filter_str); - EXPECT_FALSE(ParseFilterString()); -} - } // namespace tera diff --git a/src/sdk/test/sdk_test.cc b/src/sdk/test/sdk_test.cc new file mode 100644 index 000000000..7177bdc3a --- /dev/null +++ b/src/sdk/test/sdk_test.cc @@ -0,0 +1,16 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "gtest/gtest.h" +DECLARE_bool(tera_sdk_tso_client_enabled); +DECLARE_bool(tera_sdk_client_for_gtxn); + +int main(int argc, char* argv[]) { + FLAGS_tera_sdk_client_for_gtxn = true; + FLAGS_tera_sdk_tso_client_enabled = false; + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/src/sdk/test/sdk_timeout_manager_test.cc b/src/sdk/test/sdk_timeout_manager_test.cc new file mode 100644 index 000000000..84ea5a4c1 --- /dev/null +++ b/src/sdk/test/sdk_timeout_manager_test.cc @@ -0,0 +1,244 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include "gflags/gflags.h" +#include "gtest/gtest.h" + +#include "sdk_task.h" +#include "common/counter.h" +#include "common/timer.h" + +using std::string; +using namespace std::placeholders; +DEFINE_int32(thread_num, 10, "thread number for TimeoutManager ThreadPool"); +DEFINE_int32(perf_test_thead_num, 10, "thread number of put/pop"); +DEFINE_int32(perf_test_duration, 2, "seconds for performance test"); +namespace tera { + +#define YELLOW "\033[33m" /* Yellow */ + +static Counter callback_called_times = Counter(); +static Counter task_counter = Counter(); + +class TestTask : public SdkTask { +public: + std::string dummy_key; + + TestTask() : SdkTask(SdkTask::READ) {} + virtual ~TestTask() {} + + bool IsAsync() { return false; } + uint32_t Size() { return 0; } + int64_t TimeOut() { return 0; } + void Wait() {} + void SetError(ErrorCode::ErrorCodeType err, + const std::string& reason) {} + const std::string& RowKey() { return dummy_key; } +}; + +class SdkTimeoutManagerTest : public ::testing::Test { +public: + SdkTimeoutManagerTest() : thread_pool_(FLAGS_thread_num), timeout_manager_(NULL) {} + + virtual void SetUp() { + timeout_manager_ = new SdkTimeoutManager(&thread_pool_); + ASSERT_TRUE(timeout_manager_ != NULL); + callback_called_times.Clear(); + task_counter.Clear(); + } + virtual void TearDown() { + delete timeout_manager_; + } + +private: + common::ThreadPool thread_pool_; + SdkTimeoutManager* timeout_manager_ = NULL; +}; + +static void TimeoutFunc(SdkTask* task) { + callback_called_times.Add(1); +} + +static SdkTask::TimeoutFunc timeout_func = std::bind(TimeoutFunc, _1); + +TEST_F(SdkTimeoutManagerTest, PutTaskPopTaskTest) { + const int32_t LOOP_CNT = 10000; + int64_t put_start_time = get_micros(); + bool succ = true; + for (int32_t i = 0; i < LOOP_CNT; ++i) { + TestTask* sdk_task = new TestTask(); + sdk_task->SetId(LOOP_CNT - i); + succ &= timeout_manager_->PutTask(sdk_task, 5000, timeout_func); + } + EXPECT_TRUE(succ); + int64_t put_done_time = get_micros(); + + uint32_t task_cnt = 0; + for (uint32_t i = 0; i < SdkTimeoutManager::kShardNum; ++i) { + uint32_t shard_due_cnt = timeout_manager_->map_shard_[i].due_time_map.size(); + EXPECT_EQ(shard_due_cnt, timeout_manager_->map_shard_[i].id_hash_map.size()); + task_cnt += shard_due_cnt; + } + EXPECT_EQ(task_cnt, LOOP_CNT); + + int64_t pop_start_time = get_micros(); + for (uint32_t shard_idx = 0; shard_idx < SdkTimeoutManager::kShardNum; ++shard_idx) { + SdkTimeoutManager::DueTimeMap& due_time_map = + timeout_manager_->map_shard_[shard_idx].due_time_map; + uint32_t shard_task_cnt = due_time_map.size(); + uint32_t shard_pop_cnt = 0; + while (!due_time_map.empty()) { + SdkTask* task = timeout_manager_->PopTask((*due_time_map.begin())->GetId()); + EXPECT_TRUE(task != NULL); + shard_pop_cnt += 1; + delete static_cast(task); + } + EXPECT_EQ(shard_pop_cnt, shard_task_cnt); + } + int64_t pop_done_time = get_micros(); + + std::cout << YELLOW << "SdkTimeoutManager performance(single thread): " + << "\n\t\tPutTask: " << int(LOOP_CNT / ((put_done_time - put_start_time + 1) / 1000000.0)) + << "\n\t\tPopTask: " << int(LOOP_CNT / ((pop_done_time - pop_start_time + 1) / 1000000.0)) + << std::endl; +} + +TEST_F(SdkTimeoutManagerTest, CheckTimeout) { + const int32_t LOOP_CNT = 10000; + std::vector tasks; + tasks.reserve(LOOP_CNT); + bool succ = true; + for (int32_t i = 0; i < LOOP_CNT; ++i) { + TestTask* sdk_task = new TestTask(); + sdk_task->SetId(i + 1); + succ &= timeout_manager_->PutTask(sdk_task, 500, timeout_func); + tasks.push_back(sdk_task); + } + EXPECT_TRUE(true); + // waiting until all SdkTasks have been check timeout and their TimeoutFunc been put to thread pool to execute + for (uint32_t shard = 0; shard < SdkTimeoutManager::kShardNum; ++shard) { + while (!timeout_manager_->map_shard_[shard].due_time_map.empty()){ + usleep(timeout_manager_->timeout_precision_); + } + } + // waiting another 100ms until all TimeoutFunc in thread_pool have been done + usleep(250000); + EXPECT_EQ(callback_called_times.Get(), LOOP_CNT); + + TestTask* sdk_task = new TestTask(); + sdk_task->SetId(100); + EXPECT_TRUE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + tasks.push_back(sdk_task); + EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + + sdk_task = new TestTask(); + sdk_task->SetId(100); + EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + tasks.push_back(sdk_task); + + usleep(1000); + sdk_task = new TestTask(); + sdk_task->SetId(100); + EXPECT_FALSE(timeout_manager_->PutTask(sdk_task, 500, timeout_func)); + tasks.push_back(sdk_task); + // waiting until all SdkTasks have been check timeout and their TimeoutFunc been put to thread pool to execute + for (uint32_t shard = 0; shard < SdkTimeoutManager::kShardNum; ++shard) { + while (!timeout_manager_->map_shard_[shard].due_time_map.empty()){ + usleep(timeout_manager_->timeout_precision_); + } + } + // waiting another 100ms until all TimeoutFunc in thread_pool have been done + usleep(250000); + EXPECT_EQ(callback_called_times.Get(), 1 + LOOP_CNT); + for (std::size_t i = 0; i < tasks.size(); ++i) { + delete tasks[i]; + } +} + +static bool add_task_run = true; +static void AddTaskFunc(SdkTimeoutManager* mgr, int64_t timeout) { + while (add_task_run) { + SdkTask* task = new TestTask(); + task->SetId(task_counter.Add(1)); + mgr->PutTask(task, timeout, timeout_func); + } +} + +static void PopTaskFunc(SdkTimeoutManager* mgr) { + int64_t task_id; + while ((task_id = task_counter.Sub(1) + 1) > 0) { + SdkTask* task = mgr->PopTask(task_id); + delete static_cast(task); + } +} + +TEST_F(SdkTimeoutManagerTest, PutPopPerformance) { + std::vector threads; + threads.reserve(FLAGS_perf_test_thead_num); + add_task_run = true; + int64_t timeout = FLAGS_perf_test_duration * 1000 + 1000; + for (int32_t i = 0; i < FLAGS_perf_test_thead_num; ++i) { + threads.emplace_back(std::thread(std::bind(&AddTaskFunc, timeout_manager_, timeout))); + } + sleep(FLAGS_perf_test_duration); + add_task_run = false; + for (std::size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + int64_t task_cnt = task_counter.Get(); + + int64_t pop_start_time = get_micros(); + for (int i = 0; i < FLAGS_perf_test_thead_num; ++i) { + threads.emplace_back(std::thread(std::bind(PopTaskFunc, timeout_manager_))); + } + for (std::size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + int64_t pop_end_time = get_micros(); + std::cout << YELLOW + << "SdkTimeoutManager performance(" << FLAGS_perf_test_thead_num <<" put/pop threads): " + << "\n\t\tPutTask: " << task_cnt / FLAGS_perf_test_duration + << "\n\t\tPopTask: " << int(task_cnt / ((pop_end_time - pop_start_time) / 1000000.0)) + << std::endl; +} + +TEST_F(SdkTimeoutManagerTest, CheckTimeoutPerformance) { + common::ThreadPool thread_pool(FLAGS_thread_num); + SdkTimeoutManager* timeout_mgr = new SdkTimeoutManager(&thread_pool); + + std::vector threads; + threads.reserve(FLAGS_perf_test_thead_num); + add_task_run = true; + // timeout set to 1us + int64_t timeout = 1; + int64_t start_time = get_micros(); + for (int32_t i = 0; i < FLAGS_perf_test_thead_num; ++i) { + threads.emplace_back(std::thread(std::bind(&AddTaskFunc, timeout_mgr, timeout))); + } + sleep(FLAGS_perf_test_duration); + add_task_run = false; + int64_t end_time = get_micros(); + for (std::size_t i = 0; i < threads.size(); ++i) { + threads[i].join(); + } + threads.clear(); + int64_t callback_run_cnt = callback_called_times.Get(); + int64_t pending_cnt = task_counter.Get() - callback_run_cnt; + delete timeout_mgr; + + std::cout << YELLOW + << "SdkTimeoutManager performance@CheckTimeout(" + << FLAGS_perf_test_thead_num <<" put threads, " + << FLAGS_thread_num << "TimeoutFunc run threads): " + << "\n\t\tPutTask: " << task_counter.Get() / FLAGS_perf_test_duration + << "\n\t\tPending: " << pending_cnt / FLAGS_perf_test_duration + << "\n\t\tCheckTimeout: " <DisableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->DisableNotify(); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } -TEST(SdkUtils, ParseCfNameType) { - string in, name, type; - - in = "cf"; - ASSERT_TRUE(ParseCfNameType(in, &name, &type)); - ASSERT_TRUE(name == "cf"); - ASSERT_TRUE(type == ""); - ASSERT_TRUE(ParseCfNameType(in, NULL, &type)); - - in = ""; - ASSERT_TRUE(ParseCfNameType(in, &name, &type)); - ASSERT_TRUE(name == ""); - ASSERT_TRUE(type == ""); - ASSERT_TRUE(ParseCfNameType(in, &name, NULL)); - - in = "cf"; - ASSERT_TRUE(ParseCfNameType(in, &name, &type)); - ASSERT_TRUE(name == "cf"); - ASSERT_TRUE(type == "int"); - ASSERT_TRUE(ParseCfNameType(in, NULL, NULL)); - - in = ""; - ASSERT_TRUE(ParseCfNameType(in, &name, &type)); - ASSERT_TRUE(name == ""); - ASSERT_TRUE(type == "int"); - - in = "cf<"; - ASSERT_FALSE(ParseCfNameType(in, &name, &type)); - - in = "cf1int>"; - ASSERT_FALSE(ParseCfNameType(in, &name, &type)); - - in = "<>"; - ASSERT_FALSE(ParseCfNameType(in, &name, &type)); -} - -TEST(SdkUtils, CommaInBracket) { - string test; - - test = "0123,{67,90,23},6,89,1{3,567,}01{345}789,12"; - EXPECT_TRUE(CommaInBracket(test, 8)); - EXPECT_TRUE(CommaInBracket(test, 13)); - EXPECT_TRUE(CommaInBracket(test, 23)); - EXPECT_TRUE(CommaInBracket(test, 27)); - EXPECT_TRUE(CommaInBracket(test, 34)); - - EXPECT_FALSE(CommaInBracket(test, 2)); - EXPECT_FALSE(CommaInBracket(test, 4)); - EXPECT_FALSE(CommaInBracket(test, 15)); - EXPECT_FALSE(CommaInBracket(test, 20)); - EXPECT_FALSE(CommaInBracket(test, 37)); -} - -TEST(SdkUtils, SplitCfSchema) { - string schema; - std::vector cfs; - - schema = "cf1"; - SplitCfSchema(schema, &cfs); - EXPECT_EQ(cfs.size(), 1); - - schema = "cf1,cf2,cf3"; - SplitCfSchema(schema, &cfs); - EXPECT_EQ(cfs.size(), 3); - - schema = "cf2{prop1,prop2}"; - SplitCfSchema(schema, &cfs); - EXPECT_EQ(cfs.size(), 1); - - schema = "cf1,cf2{prop1,prop2},cf3{prop2}"; - SplitCfSchema(schema, &cfs); - EXPECT_EQ(cfs.size(), 3); - - schema = "cf1{prop1,prop2,prop3},cf2,cf3{prop1,prop2,prop3}"; - SplitCfSchema(schema, &cfs); - EXPECT_EQ(cfs.size(), 3); - - schema = "cf1{prop1,prop2,prop3},cf2{prop1,prop2,prop3},cf3"; - SplitCfSchema(schema, &cfs); - EXPECT_EQ(cfs.size(), 3); - - schema = "cf1,cf2{prop1,prop2,prop3},cf3{prop1,prop2,prop3}"; - SplitCfSchema(schema, &cfs); - EXPECT_EQ(cfs.size(), 3); - - schema = "cf1{prop1,prop2,prop3},cf2{prop1,prop2,prop3},cf3{prop1,prop2,prop3}"; - SplitCfSchema(schema, &cfs); - EXPECT_EQ(cfs.size(), 3); +TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor1) { + // some disable notify + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->DisableNotify(); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); } -TEST(SdkUtils, ParseProperty) { - string schema; - PropertyList prop_list; - string name; - - schema = "name{prop1,prop2=value2,prop3=value3}"; - ASSERT_TRUE(ParseProperty(schema, &name, &prop_list)); - ASSERT_TRUE(name == "name"); - ASSERT_EQ(prop_list.size(), 3); - ASSERT_TRUE(prop_list[0].first == "prop1"); - ASSERT_TRUE(prop_list[0].second == ""); - ASSERT_TRUE(prop_list[1].first == "prop2"); - ASSERT_TRUE(prop_list[1].second == "value2"); - ASSERT_TRUE(prop_list[2].first == "prop3"); - ASSERT_TRUE(prop_list[2].second == "value3"); - - schema = "{prop1,prop2=value2}"; - ASSERT_TRUE(ParseProperty(schema, &name, &prop_list)); - ASSERT_TRUE(name == ""); - ASSERT_EQ(prop_list.size(), 2); - ASSERT_TRUE(prop_list[0].first == "prop1"); - ASSERT_TRUE(prop_list[0].second == ""); - ASSERT_TRUE(prop_list[1].first == "prop2"); - ASSERT_TRUE(prop_list[1].second == "value2"); - - schema = "name"; - ASSERT_TRUE(ParseProperty(schema, &name, &prop_list)); - ASSERT_TRUE(name == "name"); - ASSERT_EQ(prop_list.size(), 0); - - schema = ""; - ASSERT_TRUE(ParseProperty(schema, &name, &prop_list)); - ASSERT_TRUE(name == ""); - ASSERT_EQ(prop_list.size(), 0); - - schema = "nameprop1,prop2=value2,prop3=value3}"; - ASSERT_FALSE(ParseProperty(schema, &name, &prop_list)); - - schema = "name{prop1,pr'op2=value2,prop3=value3}"; - ASSERT_FALSE(ParseProperty(schema, &name, &prop_list)); - - schema = "name{0prop1,prop2=value2,prop3=value3}"; - ASSERT_FALSE(ParseProperty(schema, &name, &prop_list)); +TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor2) { + // some disable notify + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->DisableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->EnableNotify(); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); } -TEST(SdkUtils, ParseScanSchema) { - ScanDescriptor desc("row1"); - ScanDescImpl* impl; - string schema; - - schema = "SELECT cf0,cf1:qu2"; - ASSERT_TRUE(ParseScanSchema(schema, &desc)); - impl = desc.GetImpl(); - ASSERT_EQ(impl->GetSizeofColumnFamilyList(), 2); - ASSERT_TRUE(impl->GetFilterString() == ""); - - schema = "SELECT cf0,cf1:qu2 WHERE cf0 < 10 AND cf1 > 23"; - ASSERT_TRUE(ParseScanSchema(schema, &desc)); - impl = desc.GetImpl(); - ASSERT_EQ(impl->GetSizeofColumnFamilyList(), 2); - ASSERT_TRUE(impl->GetFilterString() == "cf0 < 10 AND cf1 > 23"); +TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor3) { + // all enable notify + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1"); + cfd1->EnableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + cfd2->EnableNotify(); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num + 1); } -TEST(SdkUtils, BuildSchema) { - string schema = "lg0:cf1,cf2|lg3:cf3,cf4,cf5"; - - TableDescriptor table_desc("unittest"); - ParseSchema(schema, &table_desc); - - string schema_t; - BuildSchema(&table_desc, &schema_t); - EXPECT_TRUE(schema == schema_t); +TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor4) { + // have lg named 'notify' but not set any cf 'notify=on' + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("notify"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1", "notify"); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2", "notify"); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } -TEST(SdkUtils, HasInvalidCharInSchema) { - EXPECT_FALSE(HasInvalidCharInSchema("")); - EXPECT_FALSE(HasInvalidCharInSchema("table:splitsize=3,lg0:compress=none")); - - EXPECT_TRUE(HasInvalidCharInSchema("\n \t`~!@#$%^&*()-+{}[]\\|;\"'.<>?/")); - EXPECT_TRUE(HasInvalidCharInSchema("table:splitsize=3;lg0:compress=none")); +TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor5) { + // have lg named 'notify' and set some cf 'notify=on' + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("notify"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("cf1", "notify"); + cfd1->EnableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2", "notify"); + auto before_num = schema.LocalityGroupNum(); + EXPECT_FALSE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } -TEST(SdkUtils, PrefixType) { - EXPECT_TRUE(PrefixType("compress") == "lg"); - EXPECT_TRUE(PrefixType("storage") == "lg"); - EXPECT_TRUE(PrefixType("blocksize") == "lg"); - EXPECT_TRUE(PrefixType("ttl") == "cf"); - EXPECT_TRUE(PrefixType("maxversions") == "cf"); - EXPECT_TRUE(PrefixType("minversions") == "cf"); - EXPECT_TRUE(PrefixType("diskquota") == "cf"); - EXPECT_TRUE(PrefixType("splitsize") == "unknown"); // only support lg && cf - EXPECT_TRUE(PrefixType("anythingother") == "unknown"); +TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor6) { + // have cf named '_N_' but not set any cf 'notify=on' + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("_N_"); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + auto before_num = schema.LocalityGroupNum(); + EXPECT_TRUE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } -TEST(SdkUtils, ParsePrefixPropertyValue) { - string prefix; - string property; - string value; - EXPECT_TRUE(ParsePrefixPropertyValue("lg123:compress=none", prefix, property, value)); - - EXPECT_FALSE(ParsePrefixPropertyValue(":ttl=3", prefix, property, value)); - EXPECT_FALSE(ParsePrefixPropertyValue("cf123:=3", prefix, property, value)); - EXPECT_FALSE(ParsePrefixPropertyValue("cf123:ttl=", prefix, property, value)); - EXPECT_FALSE(ParsePrefixPropertyValue("ttl", prefix, property, value)); - EXPECT_FALSE(ParsePrefixPropertyValue("cf123:ttl", prefix, property, value)); - EXPECT_FALSE(ParsePrefixPropertyValue("cf123:ttl:3", prefix, property, value)); +TEST(SdkUtilsTest, ExtendNotifyLgToDescriptor7) { + // have cf named '_N_' but some set cf 'notify=on' + tera::TableDescriptor schema("t1"); + schema.AddLocalityGroup("lg0"); + tera::ColumnFamilyDescriptor* cfd1 = schema.AddColumnFamily("_N_"); + cfd1->EnableNotify(); + tera::ColumnFamilyDescriptor* cfd2 = schema.AddColumnFamily("cf2"); + auto before_num = schema.LocalityGroupNum(); + EXPECT_FALSE(ExtendNotifyLgToDescriptor(&schema)); + EXPECT_TRUE(schema.LocalityGroupNum() == before_num); } -} // namespace sdk } // namespace tera diff --git a/src/sdk/timeoracle_client_impl.cc b/src/sdk/timeoracle_client_impl.cc new file mode 100644 index 000000000..7f0e16b6e --- /dev/null +++ b/src/sdk/timeoracle_client_impl.cc @@ -0,0 +1,118 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "sdk/timeoracle_client_impl.h" +#include +#include + +#include "common/timer.h" + +namespace tera { +namespace timeoracle { + +TimeoracleClientImpl::TimeoracleClientImpl(ThreadPool* thread_pool, + sdk::ClusterFinder* cluster_finder, + int32_t rpc_timeout) : + RpcClient(cluster_finder->TimeoracleAddr()), + thread_pool_(thread_pool), + rpc_timeout_(rpc_timeout), + update_timestamp_(0), + cluster_finder_(cluster_finder) {} + +void TimeoracleClientImpl::refresh_timeoracle_address(int64_t last_timestamp) { + std::unique_lock lock_guard(mutex_); + if (last_timestamp > 0 && last_timestamp < update_timestamp_) { + return; + } + + LOG(INFO) << "TimeoracleClientImpl try to update cluster, before is " << GetConnectAddr(); + std::string addr = cluster_finder_->TimeoracleAddr(true); + ResetClient(addr); + LOG(INFO) << "TimeoracleClientImpl update cluster, current is " << GetConnectAddr(); + update_timestamp_ = get_micros(); +} + +int64_t TimeoracleClientImpl::GetTimestamp(uint32_t count) { + GetTimestampRequest request; + GetTimestampResponse response; + + request.set_count(count); + + std::function done; + + if (SendMessageWithRetry(&TimeoracleServer::Stub::GetTimestamp, + &request, + &response, + done, + "GetTimestamp", + rpc_timeout_, + thread_pool_)) { + int code = response.status(); + if (code != kTimeoracleOk) { + // Internel Error + return 0; + } + return response.start_timestamp(); + } + + // Rpc Failed + refresh_timeoracle_address(0); + return 0; +} + +bool TimeoracleClientImpl::GetTimestamp(uint32_t count, std::function callback) { + auto request = new GetTimestampRequest(); + auto response = new GetTimestampResponse(); + request->set_count(count); + int64_t start_time = get_micros(); + + std::function done + = std::bind(&TimeoracleClientImpl::OnRpcFinished, this, start_time, callback, + std::placeholders::_1, std::placeholders::_2, + std::placeholders::_3, std::placeholders::_4); + + if (SendMessageWithRetry(&TimeoracleServer::Stub::GetTimestamp, + request, + response, + done, + "GetTimestamp", + rpc_timeout_, + thread_pool_)) { + return true; + } + + // Rpc Failed + refresh_timeoracle_address(0); + return false; +} + +void TimeoracleClientImpl::OnRpcFinished(int64_t start_time, + std::function callback, + const GetTimestampRequest* request, + GetTimestampResponse* response, + bool rpc_error, + int error_code){ + std::unique_ptr req_hold(request); + std::unique_ptr res_hold(response); + + if (rpc_error) { + LOG(ERROR) << "RpcRequest failed for GetTimestamp, errno=" << error_code; + callback(0); + refresh_timeoracle_address(start_time); + return ; + } + + int64_t ts = response->start_timestamp(); + + int code = response->status(); + + if (code != kTimeoracleOk) { + ts = 0; + } + + callback(ts); +} + +} // namespace timeoracle +} // namespace tera diff --git a/src/sdk/timeoracle_client_impl.h b/src/sdk/timeoracle_client_impl.h new file mode 100644 index 000000000..e47fe9995 --- /dev/null +++ b/src/sdk/timeoracle_client_impl.h @@ -0,0 +1,56 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_SDK_TIMEORACLE_CLIENT_IMPL_H_ +#define TERA_SDK_TIMEORACLE_CLIENT_IMPL_H_ + +#include +#include +#include +#include + +#include "proto/timeoracle_rpc.pb.h" +#include "proto/rpc_client.h" +#include "sdk/sdk_zk.h" + +DECLARE_int32(tera_rpc_timeout_period); + +namespace tera { +namespace timeoracle { + +class TimeoracleClientImpl : public RpcClient { +public: + TimeoracleClientImpl(ThreadPool* thread_pool, + sdk::ClusterFinder* cluster_finder, + int32_t rpc_timeout = FLAGS_tera_rpc_timeout_period); + + ~TimeoracleClientImpl() {} + + int64_t GetTimestamp(uint32_t count); + + bool GetTimestamp(uint32_t count, std::function callback); + +private: + void refresh_timeoracle_address(int64_t last_timestamp); + + void OnRpcFinished(int64_t start_time, + std::function callback, + const GetTimestampRequest* request, + GetTimestampResponse* response, + bool rpc_error, + int error_code); + +private: + ThreadPool* thread_pool_; + int32_t rpc_timeout_; + + std::mutex mutex_; + int64_t update_timestamp_; + sdk::ClusterFinder* cluster_finder_; +}; + +} // namespace timeoracle +} // namespace tera + +#endif // TERA_SDK_TIMEORACLE_CLIENT_IMPL_H_ diff --git a/src/tabletnode/remote_tabletnode.cc b/src/tabletnode/remote_tabletnode.cc index 2d95a0e5a..87f1a71de 100644 --- a/src/tabletnode/remote_tabletnode.cc +++ b/src/tabletnode/remote_tabletnode.cc @@ -5,14 +5,18 @@ #include "tabletnode/remote_tabletnode.h" #include +#include #include "gflags/gflags.h" #include "glog/logging.h" +#include "common/metric/metric_counter.h" +#include "common/metric/ratio_subscriber.h" +#include "common/metric/prometheus_subscriber.h" #include "tabletnode/tabletnode_impl.h" -#include "utils/counter.h" +#include "tabletnode/tabletnode_metric_name.h" #include "utils/network_utils.h" -#include "utils/timer.h" +#include "common/timer.h" DECLARE_int32(tera_tabletnode_ctrl_thread_num); DECLARE_int32(tera_tabletnode_write_thread_num); @@ -22,14 +26,93 @@ DECLARE_int32(tera_tabletnode_manual_compact_thread_num); DECLARE_int32(tera_request_pending_limit); DECLARE_int32(tera_scan_request_pending_limit); -extern tera::Counter read_pending_counter; -extern tera::Counter write_pending_counter; -extern tera::Counter scan_pending_counter; -extern tera::Counter compact_pending_counter; - namespace tera { namespace tabletnode { +//Add SubscriberType::SUM for caculating SLA +tera::MetricCounter read_request_counter(kRequestCountMetric, kApiLabelRead, + {SubscriberType::QPS, SubscriberType::SUM}); +tera::MetricCounter write_request_counter(kRequestCountMetric, kApiLabelWrite, + {SubscriberType::QPS, SubscriberType::SUM}); +tera::MetricCounter scan_request_counter(kRequestCountMetric, kApiLabelScan, {SubscriberType::QPS}); + +tera::MetricCounter read_pending_counter(kPendingCountMetric, kApiLabelRead, {SubscriberType::LATEST}, false); +tera::MetricCounter write_pending_counter(kPendingCountMetric, kApiLabelWrite, {SubscriberType::LATEST}, false); +tera::MetricCounter scan_pending_counter(kPendingCountMetric, kApiLabelScan, {SubscriberType::LATEST}, false); +tera::MetricCounter compact_pending_counter(kPendingCountMetric, kApiLabelCompact, {SubscriberType::LATEST}, false); + +//Add SubscriberType::SUM for caculating SLA +tera::MetricCounter read_reject_counter(kRejectCountMetric, kApiLabelRead, + {SubscriberType::QPS, SubscriberType::SUM}); +tera::MetricCounter write_reject_counter(kRejectCountMetric, kApiLabelWrite, + {SubscriberType::QPS, SubscriberType::SUM}); +tera::MetricCounter scan_reject_counter(kRejectCountMetric, kApiLabelScan, {SubscriberType::QPS}); + +tera::MetricCounter finished_read_request_counter(kFinishedRequestCountMetric, kApiLabelRead, {SubscriberType::QPS}); +tera::MetricCounter finished_write_request_counter(kFinishedRequestCountMetric, kApiLabelWrite, {SubscriberType::QPS}); +tera::MetricCounter finished_scan_request_counter(kFinishedRequestCountMetric, kApiLabelScan, {SubscriberType::QPS}); + +//These three metrics are not auto registered with a subscriber, they are used for ratio subscriber. +tera::MetricCounter read_delay(kRequestDelayMetric, kApiLabelRead, {}); +tera::MetricCounter write_delay(kRequestDelayMetric, kApiLabelWrite, {}); +tera::MetricCounter scan_delay(kRequestDelayMetric, kApiLabelScan, {}); + +tera::AutoSubscriberRegister rand_read_delay_per_request(std::unique_ptr(new tera::RatioSubscriber( + MetricId("tera_ts_read_delay_us_per_request"), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRequestDelayMetric, kApiLabelRead), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kFinishedRequestCountMetric, kApiLabelRead), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister write_delay_per_request(std::unique_ptr(new tera::RatioSubscriber( + MetricId("tera_ts_write_delay_us_per_request"), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRequestDelayMetric, kApiLabelWrite), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kFinishedRequestCountMetric, kApiLabelWrite), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister scan_delay_per_request(std::unique_ptr(new tera::RatioSubscriber( + MetricId("tera_ts_scan_delay_us_per_request"), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kRequestDelayMetric, kApiLabelScan), SubscriberType::SUM)), + std::unique_ptr(new tera::PrometheusSubscriber(MetricId(kFinishedRequestCountMetric, kApiLabelScan), SubscriberType::SUM))))); + +void ReadDoneWrapper::Run() { + if (response_->has_detail()) { + int64_t now_us = get_micros(); + int64_t used_us = now_us - start_micros_; + if (used_us <= 0) { + LOG(ERROR) << "now us: "<< now_us << " start_us: "<< start_micros_; + } + finished_read_request_counter.Add(response_->detail().status_size()); + read_delay.Add(used_us); + } + delete this; +} + +void WriteDoneWrapper::Run() { + if (response_->row_status_list_size() != 0) { + int64_t now_us = get_micros(); + int64_t used_us = now_us - start_micros_; + if (used_us <= 0) { + LOG(ERROR) << "now us: "<< now_us << " start_us: "<< start_micros_; + } + + finished_write_request_counter.Add(response_->row_status_list_size()); + write_delay.Add(used_us); + } + delete this; +} + +void ScanDoneWrapper::Run() { + if (response_->has_results()) { + int64_t now_us = get_micros(); + int64_t used_us = now_us - start_micros_; + if (used_us <= 0) { + LOG(ERROR) << "now us: "<< now_us << " start_us: "<< start_micros_; + } + + finished_scan_request_counter.Add(response_->results().key_values_size()); + scan_delay.Add(used_us); + } + delete this; +} + enum RpcType { RPC_READ = 1, RPC_SCAN = 2 @@ -105,11 +188,16 @@ void RemoteTabletNode::ReadTablet(google::protobuf::RpcController* controller, const ReadTabletRequest* request, ReadTabletResponse* response, google::protobuf::Closure* done) { + int64_t start_micros = get_micros(); + done = ReadDoneWrapper::NewInstance(start_micros, response, done); VLOG(8) << "accept RPC (ReadTablet): [" << request->tablet_name() << "] " << tera::utils::GetRemoteAddress(controller); static uint32_t last_print = time(NULL); + int32_t row_num = request->row_info_list_size(); + read_request_counter.Add(row_num); if (read_pending_counter.Get() > FLAGS_tera_request_pending_limit) { response->set_sequence_id(request->sequence_id()); response->set_status(kTabletNodeIsBusy); + read_reject_counter.Add(row_num); done->Run(); uint32_t now_time = time(NULL); if (now_time > last_print) { @@ -118,9 +206,7 @@ void RemoteTabletNode::ReadTablet(google::protobuf::RpcController* controller, } VLOG(8) << "finish RPC (ReadTablet)"; } else { - int32_t row_num = request->row_info_list_size(); read_pending_counter.Add(row_num); - int64_t start_micros = get_micros(); ReadRpcTimer* timer = new ReadRpcTimer(request, response, done, start_micros); RpcTimerList::Instance()->Push(timer); @@ -136,11 +222,16 @@ void RemoteTabletNode::WriteTablet(google::protobuf::RpcController* controller, const WriteTabletRequest* request, WriteTabletResponse* response, google::protobuf::Closure* done) { + int64_t start_micros = get_micros(); + done = WriteDoneWrapper::NewInstance(start_micros, response, done); VLOG(8) << "accept RPC (WriteTablet): [" << request->tablet_name() << "] " << tera::utils::GetRemoteAddress(controller); static uint32_t last_print = time(NULL); + int32_t row_num = request->row_list_size(); + write_request_counter.Add(row_num); if (write_pending_counter.Get() > FLAGS_tera_request_pending_limit) { response->set_sequence_id(request->sequence_id()); response->set_status(kTabletNodeIsBusy); + write_reject_counter.Add(row_num); done->Run(); uint32_t now_time = time(NULL); if (now_time > last_print) { @@ -149,9 +240,7 @@ void RemoteTabletNode::WriteTablet(google::protobuf::RpcController* controller, } VLOG(8) << "finish RPC (WriteTablet)"; } else { - int32_t row_num = request->row_list_size(); write_pending_counter.Add(row_num); - int64_t start_micros = get_micros(); WriteRpcTimer* timer = new WriteRpcTimer(request, response, done, start_micros); RpcTimerList::Instance()->Push(timer); ThreadPool::Task callback = @@ -165,10 +254,13 @@ void RemoteTabletNode::ScanTablet(google::protobuf::RpcController* controller, const ScanTabletRequest* request, ScanTabletResponse* response, google::protobuf::Closure* done) { + done = ScanDoneWrapper::NewInstance(get_micros(), response, done); VLOG(8) << "accept RPC (ScanTablet): [" << request->table_name() << "] " << tera::utils::GetRemoteAddress(controller); + scan_request_counter.Inc(); if (scan_pending_counter.Get() > FLAGS_tera_scan_request_pending_limit) { response->set_sequence_id(request->sequence_id()); response->set_status(kTabletNodeIsBusy); + scan_reject_counter.Inc(); done->Run(); VLOG(8) << "finish RPC (ScanTablet)"; } else { @@ -254,6 +346,18 @@ void RemoteTabletNode::SplitTablet(google::protobuf::RpcController* controller, ctrl_thread_pool_->AddTask(callback); } +void RemoteTabletNode::ComputeSplitKey(google::protobuf::RpcController* controller, + const SplitTabletRequest* request, + SplitTabletResponse* response, + google::protobuf::Closure* done) { + uint64_t id = request->sequence_id(); + LOG(INFO) << "accept RPC (ComputeSplitKey) id: " << id << ", src: " << tera::utils::GetRemoteAddress(controller); + ThreadPool::Task callback = + std::bind(&RemoteTabletNode::DoComputeSplitKey, this, controller, + request, response, done); + ctrl_thread_pool_->AddTask(callback); +} + void RemoteTabletNode::CompactTablet(google::protobuf::RpcController* controller, const CompactTabletRequest* request, CompactTabletResponse* response, @@ -322,7 +426,7 @@ void RemoteTabletNode::DoReadTablet(google::protobuf::RpcController* controller, int64_t read_timeout = request->client_timeout_ms() * 1000; // ms -> us int64_t detal = get_micros() - start_micros; if (detal > read_timeout) { - VLOG(5) << "timeout, drop read request for:" << request->tablet_name() + LOG(WARNING) << "timeout, drop read request for:" << request->tablet_name() << ", detal(in us):" << detal << ", read_timeout(in us):" << read_timeout; is_read_timeout = true; @@ -335,6 +439,7 @@ void RemoteTabletNode::DoReadTablet(google::protobuf::RpcController* controller, response->set_sequence_id(request->sequence_id()); response->set_success_num(0); response->set_status(kTableIsBusy); + read_reject_counter.Inc(); done->Run(); } @@ -431,6 +536,16 @@ void RemoteTabletNode::DoSplitTablet(google::protobuf::RpcController* controller LOG(INFO) << "finish RPC (SplitTablet) id: " << id; } +void RemoteTabletNode::DoComputeSplitKey(google::protobuf::RpcController* controller, + const SplitTabletRequest* request, + SplitTabletResponse* response, + google::protobuf::Closure* done) { + uint64_t id = request->sequence_id(); + LOG(INFO) << "run RPC (ComputeSplitKey) id: " << id; + tabletnode_impl_->ComputeSplitKey(request, response, done); + LOG(INFO) << "finish RPC (ComputeSplitKey) id: " << id; +} + void RemoteTabletNode::DoCompactTablet(google::protobuf::RpcController* controller, const CompactTabletRequest* request, CompactTabletResponse* response, diff --git a/src/tabletnode/remote_tabletnode.h b/src/tabletnode/remote_tabletnode.h index 93e692121..936a3ff12 100644 --- a/src/tabletnode/remote_tabletnode.h +++ b/src/tabletnode/remote_tabletnode.h @@ -7,6 +7,7 @@ #include "common/base/scoped_ptr.h" #include "common/thread_pool.h" +#include "common/request_done_wrapper.h" #include "proto/tabletnode_rpc.pb.h" #include "tabletnode/rpc_schedule.h" @@ -17,6 +18,82 @@ namespace tabletnode { class TabletNodeImpl; + +class ReadDoneWrapper final : public RequestDoneWrapper { +public: + static google::protobuf::Closure* NewInstance(int64_t start_micros, + ReadTabletResponse* response, + google::protobuf::Closure* done) { + return new ReadDoneWrapper(start_micros, response, done); + } + + virtual void Run() override; + + virtual ~ReadDoneWrapper() {} + +protected: + //Just Can Create on Heap; + ReadDoneWrapper(int64_t start_micros, + ReadTabletResponse* response, + google::protobuf::Closure* done): + RequestDoneWrapper(done), + start_micros_(start_micros), + response_(response) { } + + int64_t start_micros_; + ReadTabletResponse* response_; +}; + +class WriteDoneWrapper final : public RequestDoneWrapper { +public: + static google::protobuf::Closure* NewInstance(int64_t start_micros, + WriteTabletResponse* response, + google::protobuf::Closure* done) { + return new WriteDoneWrapper(start_micros, response, done); + } + + virtual void Run() override; + + virtual ~WriteDoneWrapper() {} + +protected: + //Just Can Create on Heap; + WriteDoneWrapper(int64_t start_micros, + WriteTabletResponse* response, + google::protobuf::Closure* done): + RequestDoneWrapper(done), + start_micros_(start_micros), + response_(response) { } + + int64_t start_micros_; + WriteTabletResponse* response_; +}; + +class ScanDoneWrapper final : public RequestDoneWrapper { +public: + static google::protobuf::Closure* NewInstance(int64_t start_micros, + ScanTabletResponse* response, + google::protobuf::Closure* done) { + return new ScanDoneWrapper(start_micros, response, done); + } + + virtual void Run() override; + + virtual ~ScanDoneWrapper() {} + +protected: + //Just Can Create on Heap; + ScanDoneWrapper(int64_t start_micros, + ScanTabletResponse* response, + google::protobuf::Closure* done): + RequestDoneWrapper(done), + start_micros_(start_micros), + response_(response) { } + + int64_t start_micros_; + ScanTabletResponse* response_; +}; + class RemoteTabletNode : public TabletNodeServer { public: explicit RemoteTabletNode(TabletNodeImpl* tabletnode_impl); @@ -72,6 +149,11 @@ class RemoteTabletNode : public TabletNodeServer { SplitTabletResponse* response, google::protobuf::Closure* done); + void ComputeSplitKey(google::protobuf::RpcController* controller, + const SplitTabletRequest* request, + SplitTabletResponse* response, + google::protobuf::Closure* done); + void CompactTablet(google::protobuf::RpcController* controller, const CompactTabletRequest* request, CompactTabletResponse* response, @@ -139,6 +221,10 @@ class RemoteTabletNode : public TabletNodeServer { const SplitTabletRequest* request, SplitTabletResponse* response, google::protobuf::Closure* done); + void DoComputeSplitKey(google::protobuf::RpcController* controller, + const SplitTabletRequest* request, + SplitTabletResponse* response, + google::protobuf::Closure* done); void DoMergeTablet(google::protobuf::RpcController* controller, const MergeTabletRequest* request, diff --git a/src/tabletnode/rpc_schedule_policy.cc b/src/tabletnode/rpc_schedule_policy.cc index 2c43156ab..99a897dee 100644 --- a/src/tabletnode/rpc_schedule_policy.cc +++ b/src/tabletnode/rpc_schedule_policy.cc @@ -8,7 +8,7 @@ #include "glog/logging.h" -#include "utils/timer.h" +#include "common/timer.h" namespace tera { namespace tabletnode { diff --git a/src/tabletnode/tabletnode_entry.cc b/src/tabletnode/tabletnode_entry.cc index 37ac8409f..a81628b14 100644 --- a/src/tabletnode/tabletnode_entry.cc +++ b/src/tabletnode/tabletnode_entry.cc @@ -9,6 +9,7 @@ #include "common/base/string_ext.h" #include "common/base/string_number.h" +#include "common/metric/collector_report.h" #include "common/net/ip_address.h" #include "common/this_thread.h" #include "common/thread_attributes.h" @@ -19,20 +20,21 @@ #include "proto/tabletnode.pb.h" #include "tabletnode/remote_tabletnode.h" #include "tabletnode/tabletnode_impl.h" -#include "utils/counter.h" +#include "common/counter.h" #include "utils/rpc_timer_list.h" -#include "utils/timer.h" +#include "common/timer.h" #include "utils/utils_cmd.h" DECLARE_string(tera_tabletnode_port); DECLARE_int32(tera_garbage_collect_period); -DECLARE_bool(tera_zk_enabled); DECLARE_bool(tera_tabletnode_cpu_affinity_enabled); DECLARE_string(tera_tabletnode_cpu_affinity_set); DECLARE_bool(tera_tabletnode_hang_detect_enabled); DECLARE_int32(tera_tabletnode_hang_detect_threshold); DECLARE_int32(tera_tabletnode_rpc_server_max_inflow); DECLARE_int32(tera_tabletnode_rpc_server_max_outflow); +DECLARE_bool(tera_metric_http_server_enable); +DECLARE_int32(tera_metric_http_server_listen_port); std::string GetTeraEntryName() { return "tabletnode"; @@ -47,7 +49,8 @@ namespace tabletnode { TabletNodeEntry::TabletNodeEntry() : tabletnode_impl_(NULL), - remote_tabletnode_(NULL) { + remote_tabletnode_(NULL), + metric_http_server_(new tera::MetricHttpServer()) { sofa::pbrpc::RpcServerOptions rpc_options; rpc_options.max_throughput_in = FLAGS_tera_tabletnode_rpc_server_max_inflow; rpc_options.max_throughput_out = FLAGS_tera_tabletnode_rpc_server_max_outflow; @@ -78,14 +81,23 @@ bool TabletNodeEntry::StartServer() { return false; } LOG(INFO) << "finish starting RPC server"; + + // start metric http server + if (FLAGS_tera_metric_http_server_enable) { + if(!metric_http_server_->Start(FLAGS_tera_metric_http_server_listen_port)) { + LOG(WARNING) << "Start metric http server failed. Ignore"; + } + } else { + LOG(INFO) << "Metric http server is disabled."; + } return true; } void TabletNodeEntry::ShutdownServer() { + metric_http_server_->Stop(); tabletnode_impl_->Exit(); - LOG(INFO) << "shut down server"; - rpc_server_->Stop(); LOG(INFO) << "TabletNodeEntry stop done!"; + _exit(0); } bool TabletNodeEntry::Run() { @@ -99,20 +111,17 @@ bool TabletNodeEntry::Run() { tabletnode_impl_->GarbageCollect(); } + CollectorReportPublisher::GetInstance().Refresh(); tabletnode_impl_->RefreshSysInfo(); tabletnode_impl_->GetSysInfo().DumpLog(); LOG(INFO) << "[ThreadPool schd/task/cnt] " << remote_tabletnode_->ProfilingLog(); - LOG(INFO) << "[Cache HitRate/Cnt/Size] table_cache " - << tabletnode_impl_->TableCacheProfileInfo() - << ", block_cache " << tabletnode_impl_->BlockCacheProfileInfo(); - int64_t now_time = get_micros(); int64_t earliest_rpc_time = now_time; RpcTimerList::Instance()->TopTime(&earliest_rpc_time); double max_delay = (now_time - earliest_rpc_time) / 1000.0; - VLOG(5) << "pending rpc max delay: " + LOG(INFO) << "pending rpc max delay: " << std::fixed<< std::setprecision(2) << max_delay; if (FLAGS_tera_tabletnode_hang_detect_enabled && max_delay > FLAGS_tera_tabletnode_hang_detect_threshold) { diff --git a/src/tabletnode/tabletnode_entry.h b/src/tabletnode/tabletnode_entry.h index a27a89747..ec87acc2b 100644 --- a/src/tabletnode/tabletnode_entry.h +++ b/src/tabletnode/tabletnode_entry.h @@ -10,6 +10,7 @@ #include #include "common/base/scoped_ptr.h" +#include "common/metric/metric_http_server.h" #include "tera_entry.h" namespace tera { @@ -37,6 +38,7 @@ class TabletNodeEntry : public TeraEntry { scoped_ptr tabletnode_impl_; RemoteTabletNode* remote_tabletnode_; scoped_ptr rpc_server_; + scoped_ptr metric_http_server_; }; } // namespace tabletnode diff --git a/src/tabletnode/tabletnode_impl.cc b/src/tabletnode/tabletnode_impl.cc index aed9d27f8..42f720723 100644 --- a/src/tabletnode/tabletnode_impl.cc +++ b/src/tabletnode/tabletnode_impl.cc @@ -14,6 +14,10 @@ #include "db/filename.h" #include "db/table_cache.h" +#include "common/metric/cache_collector.h" +#include "common/metric/prometheus_subscriber.h" +#include "common/metric/ratio_collector.h" +#include "common/metric/metric_counter.h" #include "common/thread.h" #include "io/io_utils.h" #include "io/utils_leveldb.h" @@ -28,12 +32,13 @@ #include "proto/proto_helper.h" #include "proto/tabletnode_client.h" #include "tabletnode/tablet_manager.h" +#include "tabletnode/tabletnode_metric_name.h" #include "tabletnode/tabletnode_zk_adapter.h" #include "types.h" #include "utils/config_utils.h" -#include "utils/counter.h" +#include "common/counter.h" #include "utils/string_util.h" -#include "utils/timer.h" +#include "common/timer.h" #include "utils/utils_cmd.h" DECLARE_string(tera_tabletnode_port); @@ -84,6 +89,7 @@ DECLARE_string(tera_leveldb_env_type); DECLARE_string(tera_local_addr); DECLARE_bool(tera_ins_enabled); DECLARE_bool(tera_mock_ins_enabled); +DECLARE_string(tera_coord_type); DECLARE_bool(tera_io_cache_path_vanish_allowed); DECLARE_int64(tera_tabletnode_tcm_cache_size); @@ -92,20 +98,49 @@ DECLARE_string(flagfile); using namespace std::placeholders; -extern tera::Counter range_error_counter; -extern tera::Counter rand_read_delay; - static const int GC_LOG_LEVEL = FLAGS_tera_tabletnode_gc_log_level; +namespace leveldb { +extern tera::Counter snappy_before_size_counter; +extern tera::Counter snappy_after_size_counter; +} + namespace tera { namespace tabletnode { +using tera::SubscriberType; + +tera::MetricCounter read_error_counter(kErrorCountMetric, kApiLabelRead, + {SubscriberType::QPS, SubscriberType::SUM}); +tera::MetricCounter write_error_counter(kErrorCountMetric, kApiLabelWrite, + {SubscriberType::QPS, SubscriberType::SUM}); +tera::MetricCounter scan_error_counter(kErrorCountMetric, kApiLabelScan, + {SubscriberType::QPS, SubscriberType::SUM}); + +tera::MetricCounter read_range_error_counter(kRangeErrorMetric, kApiLabelRead, {SubscriberType::QPS}); +tera::MetricCounter write_range_error_counter(kRangeErrorMetric, kApiLabelWrite, {SubscriberType::QPS}); +tera::MetricCounter scan_range_error_counter(kRangeErrorMetric, kApiLabelScan, {SubscriberType::QPS}); + +TabletNodeImpl::CacheMetrics::CacheMetrics(leveldb::Cache* block_cache, leveldb::TableCache* table_cache) + : block_cache_hitrate_(kBlockCacheHitRateMetric, + std::unique_ptr(new LRUCacheCollector(block_cache, CacheCollectType::kHitRate))), + block_cache_entries_(kBlockCacheEntriesMetric, + std::unique_ptr(new LRUCacheCollector(block_cache, CacheCollectType::kEntries))), + block_cache_charge_(kBlockCacheChargeMetric, + std::unique_ptr(new LRUCacheCollector(block_cache, CacheCollectType::kCharge))), + table_cache_hitrate_(kTableCacheHitRateMetric, + std::unique_ptr(new TableCacheCollector(table_cache, CacheCollectType::kHitRate))), + table_cache_entries_(kTableCacheEntriesMetric, + std::unique_ptr(new TableCacheCollector(table_cache, CacheCollectType::kEntries))), + table_cache_charge_(kTableCacheChargeMetric, + std::unique_ptr(new TableCacheCollector(table_cache, CacheCollectType::kCharge))) {} TabletNodeImpl::TabletNodeImpl() : status_(kNotInited), tablet_manager_(new TabletManager()), zk_adapter_(NULL), release_cache_timer_id_(kInvalidTimerId), - thread_pool_(new ThreadPool(FLAGS_tera_tabletnode_impl_thread_max_num)) { + thread_pool_(new ThreadPool(FLAGS_tera_tabletnode_impl_thread_max_num)), + cache_metrics_(NULL) { if (FLAGS_tera_local_addr == "") { local_addr_ = utils::GetLocalHostName()+ ":" + FLAGS_tera_tabletnode_port; } else { @@ -157,24 +192,42 @@ TabletNodeImpl::~TabletNodeImpl() { } bool TabletNodeImpl::Init() { - if (FLAGS_tera_zk_enabled) { + if (FLAGS_tera_coord_type.empty()) { + LOG(ERROR) << "Note: We don't recommend that use '" + << "--tera_[zk|ins|mock_zk|mock_ins]_enabled' flag for your cluster coord" + << " replace by '--tera_coord_type=[zk|ins|mock_zk|mock_ins|fake_zk]'" + << " flag is usually recommended."; + } + if (FLAGS_tera_coord_type == "zk" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_zk_enabled)) { zk_adapter_.reset(new TabletNodeZkAdapter(this, local_addr_)); - } else if(FLAGS_tera_ins_enabled) { + } else if (FLAGS_tera_coord_type == "ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_ins_enabled)) { LOG(INFO) << "ins mode!"; zk_adapter_.reset(new InsTabletNodeZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_mock_zk_enabled) { + } else if (FLAGS_tera_coord_type == "mock_zk" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_zk_enabled)) { LOG(INFO) << "mock zk mode!"; zk_adapter_.reset(new MockTabletNodeZkAdapter(this, local_addr_)); - } else if (FLAGS_tera_mock_ins_enabled) { + } else if (FLAGS_tera_coord_type == "mock_ins" || + (FLAGS_tera_coord_type.empty() && FLAGS_tera_mock_ins_enabled)) { LOG(INFO) << "mock ins mode!"; zk_adapter_.reset(new MockInsTabletNodeZkAdapter(this, local_addr_)); - } else { + } else if (FLAGS_tera_coord_type == "fake_zk" || + FLAGS_tera_coord_type.empty()) { LOG(INFO) << "fake zk mode!"; zk_adapter_.reset(new FakeTabletNodeZkAdapter(this, local_addr_)); } SetTabletNodeStatus(kIsIniting); thread_pool_->AddTask(std::bind(&TabletNodeZkAdapterBase::Init, zk_adapter_.get())); + + // register cache metrics + cache_metrics_.reset(new CacheMetrics(ldb_block_cache_, ldb_table_cache_)); + // register snappy metrics + snappy_ratio_metric_.reset(new AutoCollectorRegister(kSnappyCompressionRatioMetric, std::unique_ptr( + new RatioCollector(&leveldb::snappy_before_size_counter, &leveldb::snappy_after_size_counter, true)))); + return true; } @@ -208,6 +261,8 @@ void TabletNodeImpl::InitCacheSystem() { } bool TabletNodeImpl::Exit() { + cache_metrics_.reset(NULL); + std::vector tablet_ios; tablet_manager_->GetAllTablets(&tablet_ios); @@ -309,6 +364,11 @@ void TabletNodeImpl::LoadTablet(const LoadTabletRequest* request, CHECK(i < 2) << "parent_tablets should less than 2: " << i; parent_tablets.push_back(request->parent_tablets(i)); } + std::set ignore_err_lgs; + for (int i = 0; i < request->ignore_err_lgs_size(); ++i) { + VLOG(10) << "oops lg:" << request->ignore_err_lgs(i); + ignore_err_lgs.insert(request->ignore_err_lgs(i)); + } io::TabletIO* tablet_io = NULL; StatusCode status = kTabletNodeOk; @@ -324,7 +384,7 @@ void TabletNodeImpl::LoadTablet(const LoadTabletRequest* request, ///TODO: User per user memery_cache according to user quota. tablet_io->SetMemoryCache(m_memory_cache); if (!tablet_io->Load(schema, request->path(), parent_tablets, - snapshots, rollbacks, ldb_logger_, + ignore_err_lgs, snapshots, rollbacks, ldb_logger_, ldb_block_cache_, ldb_table_cache_, &status)) { tablet_io->DecRef(); LOG(ERROR) << "fail to load tablet: " << request->path() @@ -466,28 +526,50 @@ void TabletNodeImpl::ReadTablet(int64_t start_micros, const ReadTabletRequest* request, ReadTabletResponse* response, google::protobuf::Closure* done) { + bool is_timeout = false; int32_t row_num = request->row_info_list_size(); uint64_t snapshot_id = request->snapshot_id() == 0 ? 0 : request->snapshot_id(); uint32_t read_success_num = 0; + int64_t client_timeout_ms = std::numeric_limits::max() / 2; + if (request->has_client_timeout_ms()) { + client_timeout_ms = request->client_timeout_ms(); + } + int64_t end_time_ms = start_micros / 1000 + client_timeout_ms; + VLOG(20) << "start_ms: " << start_micros / 1000 << ", client_timeout_ms: " << client_timeout_ms + << " end_ms: " << end_time_ms; + for (int32_t i = 0; i < row_num; i++) { + int64_t time_remain_ms = end_time_ms - GetTimeStampInMs(); StatusCode row_status = kTabletNodeOk; io::TabletIO* tablet_io = tablet_manager_->GetTablet( request->tablet_name(), request->row_info_list(i).key(), &row_status); if (tablet_io == NULL) { - range_error_counter.Inc(); + read_error_counter.Inc(); + read_range_error_counter.Inc(); response->mutable_detail()->add_status(kKeyNotInRange); } else { + VLOG(20) << "time_remain_ms: " << time_remain_ms; if (tablet_io->ReadCells(request->row_info_list(i), response->mutable_detail()->add_row_result(), - snapshot_id, &row_status)) { + snapshot_id, &row_status, time_remain_ms)) { read_success_num++; } else { + if (row_status != kKeyNotExist && row_status != kRPCTimeout) { + read_error_counter.Inc(); + } response->mutable_detail()->mutable_row_result()->RemoveLast(); } tablet_io->DecRef(); response->mutable_detail()->add_status(row_status); } + + if (row_status == kRPCTimeout) { + is_timeout = true; + LOG(WARNING) << "seq_id: " << request->sequence_id() << " timeout," + << " clinet_timeout_ms: " << request->client_timeout_ms(); + break; + } } VLOG(10) << "seq_id: " << request->sequence_id() @@ -495,15 +577,14 @@ void TabletNodeImpl::ReadTablet(int64_t start_micros, << ", read_suc: " << read_success_num; response->set_sequence_id(request->sequence_id()); response->set_success_num(read_success_num); - response->set_status(kTabletNodeOk); - done->Run(); - int64_t now_ms = get_micros(); - int64_t used_ms = now_ms - start_micros; - if (used_ms <= 0) { - LOG(ERROR) << "now ms: "<< now_ms << " start_ms: "<< start_micros; + if (is_timeout) { + response->set_status(kRPCTimeout); + } else { + response->set_status(kTabletNodeOk); } - rand_read_delay.Add(used_ms); + + done->Run(); } void TabletNodeImpl::WriteTablet(const WriteTabletRequest* request, @@ -527,12 +608,12 @@ void TabletNodeImpl::WriteTablet(const WriteTabletRequest* request, return; } - Counter* row_done_counter = new Counter; + std::shared_ptr row_done_counter(new Counter); for (int32_t i = 0; i < row_num; i++) { io::TabletIO* tablet_io = tablet_manager_->GetTablet( request->tablet_name(), request->row_list(i).row_key(), &status); if (tablet_io == NULL) { - range_error_counter.Inc(); + write_range_error_counter.Inc(); } it = tablet_task_map.find(tablet_io); WriteTabletTask* tablet_task = NULL; @@ -579,6 +660,7 @@ void TabletNodeImpl::WriteTablet(const WriteTabletRequest* request, void TabletNodeImpl::WriteTabletFail(WriteTabletTask* tablet_task, StatusCode status) { int32_t row_num = tablet_task->row_status_vec.size(); + write_error_counter.Add(row_num); for (int32_t i = 0; i < row_num; i++) { tablet_task->row_status_vec[i] = status; } @@ -600,7 +682,6 @@ void TabletNodeImpl::WriteTabletCallback(WriteTabletTask* tablet_task, RpcTimerList::Instance()->Erase(tablet_task->timer); delete tablet_task->timer; } - delete tablet_task->row_done_counter; } delete tablet_task; @@ -806,12 +887,14 @@ void TabletNodeImpl::ScanTablet(const ScanTabletRequest* request, request->start(), &status); if (tablet_io == NULL) { - range_error_counter.Inc(); + scan_range_error_counter.Inc(); response->set_status(status); done->Run(); } else { response->set_end(tablet_io->GetEndKey()); - tablet_io->ScanRows(request, response, done); + if (!tablet_io->ScanRows(request, response, done)) { + scan_error_counter.Inc(); + } tablet_io->DecRef(); } } @@ -837,6 +920,14 @@ void TabletNodeImpl::SplitTablet(const SplitTabletRequest* request, done->Run(); return; } + // Master is not responsible for update children tablets to meta table, refuse to split + if (!request->has_master_update_meta() || !request->master_update_meta()) { + LOG(ERROR) << kSms <<"SplitRequest without master_update_meta, maybe " + "request from old master, refuse split!" << *tablet_io; + response->set_status(kTableNotSupport); + done->Run(); + + } if (!tablet_io->Split(&split_key, &status)) { LOG(ERROR) << "fail to split tablet: " << tablet_io->GetTablePath() @@ -852,10 +943,6 @@ void TabletNodeImpl::SplitTablet(const SplitTabletRequest* request, done->Run(); return; } - uint64_t tablet_size = 0; - tablet_io->GetDataSize(&tablet_size); - int64_t first_half_size = tablet_size / 2; - int64_t second_half_size = tablet_size / 2; LOG(INFO) << "split tablet: " << tablet_io->GetTablePath() << " [" << DebugString(tablet_io->GetStartKey()) << ", " << DebugString(tablet_io->GetEndKey()) @@ -888,11 +975,58 @@ void TabletNodeImpl::SplitTablet(const SplitTabletRequest* request, << ", " << DebugString(request->key_range().key_end()) << "], status: " << StatusCodeToString(status); } + response->set_status(kTabletNodeOk); + response->add_split_keys(split_key); + done->Run(); +} - UpdateMetaTableAsync(request, response, done, path, split_key, schema, - first_half_size, second_half_size, request->tablet_meta()); +void TabletNodeImpl::ComputeSplitKey(const SplitTabletRequest* request, + SplitTabletResponse* response, + google::protobuf::Closure* done) { + response->set_sequence_id(request->sequence_id()); + + std::string split_key = request->split_key(); + std::string path; + StatusCode status = kTabletNodeOk; + io::TabletIO* tablet_io = tablet_manager_->GetTablet(request->tablet_name(), + request->key_range().key_start(), + request->key_range().key_end(), + &status); + if (tablet_io == NULL) { + LOG(WARNING) << "split fail to get tablet: " << request->tablet_name() + << " [" << DebugString(request->key_range().key_start()) + << ", " << DebugString(request->key_range().key_end()) + << "], status: " << StatusCodeToString(status); + response->set_status(kKeyNotInRange); + done->Run(); + return; + } + + if (!tablet_io->Split(&split_key, &status)) { + LOG(ERROR) << "fail to split tablet: " << tablet_io->GetTablePath() + << " [" << DebugString(tablet_io->GetStartKey()) + << ", " << DebugString(tablet_io->GetEndKey()) + << "], split_key: " << DebugString(split_key) << ". status: " << StatusCodeToString(status); + if (status == kTableNotSupport) { + response->set_status(kTableNotSupport); + } else { + response->set_status((StatusCode)tablet_io->GetStatus()); + } + tablet_io->DecRef(); + done->Run(); + return; + } + LOG(INFO) << "split tablet: " << tablet_io->GetTablePath() + << " [" << DebugString(tablet_io->GetStartKey()) + << ", " << DebugString(tablet_io->GetEndKey()) + << "], split key: " << DebugString(split_key); + response->set_status(kTabletNodeOk); + response->add_split_keys(split_key); + tablet_io->DecRef(); + done->Run(); } + bool TabletNodeImpl::CheckInKeyRange(const KeyList& key_list, const std::string& key_start, const std::string& key_end) { @@ -954,7 +1088,7 @@ void TabletNodeImpl::LeaveSafeMode() { void TabletNodeImpl::ExitService() { LOG(FATAL) << "master kick me!"; - exit(1); + _exit(1); } void TabletNodeImpl::SetTabletNodeStatus(const TabletNodeStatus& status) { @@ -971,96 +1105,6 @@ void TabletNodeImpl::SetRootTabletAddr(const std::string& root_tablet_addr) { root_tablet_addr_ = root_tablet_addr; } -void TabletNodeImpl::UpdateMetaTableAsync(const SplitTabletRequest* rpc_request, - SplitTabletResponse* rpc_response, google::protobuf::Closure* rpc_done, - const std::string& path, const std::string& key_split, - const TableSchema& schema, int64_t first_size, int64_t second_size, - const TabletMeta& meta) { - WriteTabletRequest* request = new WriteTabletRequest; - WriteTabletResponse* response = new WriteTabletResponse; - request->set_sequence_id(this_sequence_id_++); - request->set_tablet_name(FLAGS_tera_master_meta_table_name); - request->set_is_sync(true); - request->set_is_instant(true); - - TabletMeta tablet_meta; - tablet_meta.CopyFrom(meta); - tablet_meta.set_server_addr(local_addr_); - tablet_meta.clear_parent_tablets(); - tablet_meta.add_parent_tablets(leveldb::GetTabletNumFromPath(path)); - - std::string meta_key, meta_value; - VLOG(5) << "update meta for split tablet: " << path - << " [" << DebugString(rpc_request->key_range().key_start()) - << ", " << DebugString(rpc_request->key_range().key_end()) << "]"; - - CHECK(2 == rpc_request->child_tablets_size()); - // first write 2nd half - tablet_meta.set_path(leveldb::GetChildTabletPath(path, rpc_request->child_tablets(0))); - tablet_meta.set_size(second_size); - tablet_meta.mutable_key_range()->set_key_start(key_split); - tablet_meta.mutable_key_range()->set_key_end(rpc_request->key_range().key_end()); - MakeMetaTableKeyValue(tablet_meta, &meta_key, &meta_value); - RowMutationSequence* mu_seq = request->add_row_list(); - mu_seq->set_row_key(meta_key); - Mutation* mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(kPut); - mutation->set_value(meta_value); - VLOG(5) << "write meta: key [" << DebugString(meta_key) - << "], value_size: " << meta_value.size(); - - // then write 1st half - // update root_tablet_addr in fake zk mode - if (!FLAGS_tera_zk_enabled) { - zk_adapter_->GetRootTableAddr(&root_tablet_addr_); - } - TabletNodeClient meta_tablet_client(root_tablet_addr_); - - tablet_meta.set_path(leveldb::GetChildTabletPath(path, rpc_request->child_tablets(1))); - tablet_meta.set_size(first_size); - tablet_meta.mutable_key_range()->set_key_start(rpc_request->key_range().key_start()); - tablet_meta.mutable_key_range()->set_key_end(key_split); - MakeMetaTableKeyValue(tablet_meta, &meta_key, &meta_value); - mu_seq = request->add_row_list(); - mu_seq->set_row_key(meta_key); - mutation = mu_seq->add_mutation_sequence(); - mutation->set_type(kPut); - mutation->set_value(meta_value); - VLOG(5) << "write meta: key [" << DebugString(meta_key) - << "], value_size: " << meta_value.size(); - - std::function done = - std::bind(&TabletNodeImpl::UpdateMetaTableCallback, this, rpc_request, - rpc_response, rpc_done, _1, _2, _3, _4); - meta_tablet_client.WriteTablet(request, response, done); -} - - -void TabletNodeImpl::UpdateMetaTableCallback(const SplitTabletRequest* rpc_request, - SplitTabletResponse* rpc_response, google::protobuf::Closure* rpc_done, - WriteTabletRequest* request, WriteTabletResponse* response, bool failed, - int error_code) { - if (failed) { - rpc_response->set_status(kMetaTabletError); - } else if (response->status() != kTabletNodeOk) { - LOG(ERROR) << "fail to update meta for tablet: " - << request->tablet_name() << " [" - << DebugString(rpc_request->key_range().key_start()) - << ", " << DebugString(rpc_request->key_range().key_end()) - << "], status: " << StatusCodeToString(response->status()); - rpc_response->set_status(kMetaTabletError); - } else { - LOG(INFO) << "split tablet success: " << rpc_request->tablet_name() - << " [" << DebugString(rpc_request->key_range().key_start()) - << ", " << DebugString(rpc_request->key_range().key_end()) << "]"; - rpc_response->set_status(kTabletNodeOk); - } - - delete request; - delete response; - rpc_done->Run(); -} - /* * all cached tablets/files: * ------------------------------------------ @@ -1191,22 +1235,6 @@ std::string TabletNodeImpl::GetSessionId() { return session_id_; } -std::string TabletNodeImpl::BlockCacheProfileInfo() { - std::stringstream ss; - ss << ldb_block_cache_->HitRate(true); - ss << " " << ldb_block_cache_->Entries(); - ss << " " << ldb_block_cache_->TotalCharge(); - return ss.str(); -} - -std::string TabletNodeImpl::TableCacheProfileInfo() { - std::stringstream ss; - ss << ldb_table_cache_->HitRate(true); - ss << " " << ldb_table_cache_->TableEntries(); - ss << " " << ldb_table_cache_->ByteSize(); - return ss.str(); -} - TabletNodeSysInfo& TabletNodeImpl::GetSysInfo() { return sysinfo_; } diff --git a/src/tabletnode/tabletnode_impl.h b/src/tabletnode/tabletnode_impl.h index ed19d4ad6..b4d327a2d 100644 --- a/src/tabletnode/tabletnode_impl.h +++ b/src/tabletnode/tabletnode_impl.h @@ -6,8 +6,10 @@ #define TERA_TABLETNODE_TABLETNODE_IMPL_H_ #include +#include #include "common/base/scoped_ptr.h" +#include "common/metric/collector_report_publisher.h" #include "common/thread_pool.h" #include "io/tablet_io.h" @@ -38,7 +40,7 @@ class TabletNodeImpl { std::vector row_mutation_vec; std::vector row_status_vec; std::vector row_index_vec; - Counter* row_done_counter; + std::shared_ptr row_done_counter; const WriteTabletRequest* request; WriteTabletResponse* response; @@ -46,7 +48,7 @@ class TabletNodeImpl { WriteRpcTimer* timer; WriteTabletTask(const WriteTabletRequest* req, WriteTabletResponse* resp, - google::protobuf::Closure* d, WriteRpcTimer* t, Counter* c) + google::protobuf::Closure* d, WriteRpcTimer* t, std::shared_ptr c) : row_done_counter(c), request(req), response(resp), done(d), timer(t) {} }; @@ -112,6 +114,9 @@ class TabletNodeImpl { void SplitTablet(const SplitTabletRequest* request, SplitTabletResponse* response, google::protobuf::Closure* done); + void ComputeSplitKey(const SplitTabletRequest* request, + SplitTabletResponse* response, + google::protobuf::Closure* done); void EnterSafeMode(); void LeaveSafeMode(); @@ -125,10 +130,6 @@ class TabletNodeImpl { void SetSessionId(const std::string& session_id); std::string GetSessionId(); - std::string BlockCacheProfileInfo(); - - std::string TableCacheProfileInfo(); - TabletNodeSysInfo& GetSysInfo(); void RefreshSysInfo(); @@ -157,15 +158,6 @@ class TabletNodeImpl { const std::string& key_start, const std::string& key_end); - void UpdateMetaTableAsync(const SplitTabletRequest* request, - SplitTabletResponse* response, google::protobuf::Closure* done, - const std::string& path, const std::string& key_split, - const TableSchema& schema, int64_t first_size, int64_t second_size, - const TabletMeta& meta); - void UpdateMetaTableCallback(const SplitTabletRequest* rpc_request, - SplitTabletResponse* rpc_response, google::protobuf::Closure* rpc_done, - WriteTabletRequest* request, WriteTabletResponse* response, - bool failed, int error_code); void InitCacheSystem(); @@ -206,6 +198,22 @@ class TabletNodeImpl { leveldb::Cache* ldb_block_cache_; leveldb::Cache* m_memory_cache; leveldb::TableCache* ldb_table_cache_; + + // metric for caches + struct CacheMetrics { + tera::AutoCollectorRegister block_cache_hitrate_; + tera::AutoCollectorRegister block_cache_entries_; + tera::AutoCollectorRegister block_cache_charge_; + + tera::AutoCollectorRegister table_cache_hitrate_; + tera::AutoCollectorRegister table_cache_entries_; + tera::AutoCollectorRegister table_cache_charge_; + + CacheMetrics(leveldb::Cache* block_cache, leveldb::TableCache* table_cache); + }; + + scoped_ptr cache_metrics_; + scoped_ptr snappy_ratio_metric_; }; } // namespace tabletnode diff --git a/src/tabletnode/tabletnode_metric_name.h b/src/tabletnode/tabletnode_metric_name.h new file mode 100644 index 000000000..bca35a3dd --- /dev/null +++ b/src/tabletnode/tabletnode_metric_name.h @@ -0,0 +1,113 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_TABLETNODE_TABLETNODE_METRIC_NAME_H_ +#define TERA_TABLETNODE_TABLETNODE_METRIC_NAME_H_ + +#include + +#include "common/metric/hardware_collectors.h" + +namespace tera { +namespace tabletnode { + +// api labels +const char* const kApiLabelRead = "api:read"; +const char* const kApiLabelWrite = "api:write"; +const char* const kApiLabelScan = "api:scan"; +const char* const kApiLabelCompact = "api:compact"; + +// env lables +const char* const kEnvLabelDfs = "env:dfs"; +const char* const kEnvLabelSsd = "env:ssd"; +const char* const kEnvLabelPosix = "env:posix"; +const char* const kEnvLabelOther = "env:other"; + +// metric names +const char* const kRequestCountMetric = "tera_ts_request_count"; +const char* const kPendingCountMetric = "tera_ts_pending_count"; +const char* const kRejectCountMetric = "tera_ts_reject_count"; +const char* const kErrorCountMetric = "tera_ts_error_count"; +const char* const kRangeErrorMetric = "tera_ts_range_error_count"; + +const char* const kRowDelayMetric = "tera_ts_row_delay_us_total"; +const char* const kRowCountMetric = "tera_ts_row_count"; +const char* const kRowThroughPutMetric = "tera_ts_row_through_put"; +const char* const kLowLevelReadMetric = "tera_ts_low_level_read"; + +const char* const kRequestDelayMetric = "tera_ts_request_delay_us_total"; +const char* const kFinishedRequestCountMetric = "tera_ts_finished_request_count"; + +// cache metric names +const char* const kBlockCacheHitRateMetric = "tera_ts_block_cache_hit_percentage"; +const char* const kBlockCacheEntriesMetric = "tera_ts_block_cache_entry_count"; +const char* const kBlockCacheChargeMetric = "tera_ts_block_cache_charge_bytes"; + +const char* const kTableCacheHitRateMetric = "tera_ts_table_cache_hit_percentage"; +const char* const kTableCacheEntriesMetric = "tera_ts_table_cache_entry_count"; +const char* const kTableCacheChargeMetric = "tera_ts_table_cache_charge_bytes"; + +// env metric names +const char* const kDfsReadBytesThroughPut = "tera_ts_dfs_read_bytes_through_put"; +const char* const kDfsWriteBytesThroughPut = "tera_ts_dfs_write_bytes_through_put"; +const char* const kDfsReadDelayMetric = "tera_ts_dfs_read_delay_us_total"; +const char* const kDfsWriteDelayMetric = "tera_ts_dfs_write_delay_us_total"; +const char* const kDfsSyncDelayMetric = "tera_ts_dfs_sync_delay_us_total"; +const char* const kDfsReadCountMetric = "tera_ts_dfs_read_count"; +const char* const kDfsWriteCountMetric = "tera_ts_dfs_write_count"; +const char* const kDfsSyncCountMetric = "tera_ts_dfs_sync_count"; +const char* const kDfsReadDelayPerRequestMetric = "tera_ts_dfs_read_delay_us_per_request"; +const char* const kDfsWriteDelayPerRequestMetric = "tera_ts_dfs_write_delay_us_per_request"; +const char* const kDfsSyncDelayPerRequestMetric = "tera_ts_dfs_sync_delay_us_per_request"; +const char* const kDfsFlushCountMetric = "tera_ts_dfs_flush_count"; +const char* const kDfsListCountMetric = "tera_ts_dfs_list_count"; +const char* const kDfsOtherCountMetric = "tera_ts_dfs_other_count"; +const char* const kDfsExistsCountMetric = "tera_ts_dfs_exists_count"; +const char* const kDfsOpenCountMetric = "tera_ts_dfs_open_count"; +const char* const kDfsCloseCountMetric = "tera_ts_dfs_close_count"; +const char* const kDfsDeleteCountMetric = "tera_ts_dfs_delete_count"; +const char* const kDfsTellCountMetric = "tera_ts_dfs_tell_count"; +const char* const kDfsInfoCountMetric = "tera_ts_dfs_info_count"; +const char* const kDfsReadHangMetric = "tera_ts_dfs_read_hang_total"; +const char* const kDfsWriteHangMetric = "tera_ts_dfs_write_hang_total"; +const char* const kDfsSyncHangMetric = "tera_ts_dfs_sync_hang_total"; +const char* const kDfsFlushHangMetric = "tera_ts_dfs_flush_hang_total"; +const char* const kDfsListHangMetric = "tera_ts_dfs_list_hang_total"; +const char* const kDfsOtherHangMetric = "tera_ts_dfs_other_hang_total"; +const char* const kDfsExistsHangMetric = "tera_ts_dfs_exists_hang_total"; +const char* const kDfsOpenHangMetric = "tera_ts_dfs_open_hang_total"; +const char* const kDfsCloseHangMetric = "tera_ts_dfs_close_hang_total"; +const char* const kDfsDeleteHangMetric = "tera_ts_dfs_delete_hang_total"; +const char* const kDfsTellHangMetric = "tera_ts_dfs_tell_hang_total"; +const char* const kDfsInfoHangMetric = "tera_ts_dfs_info_hang_total"; + +const char* const kSsdReadCountMetric = "tera_ts_ssd_read_count"; +const char* const kSsdReadThroughPutMetric = "tera_ts_ssd_read_through_put"; +const char* const kSsdWriteCountMetric = "tera_ts_ssd_write_count"; +const char* const kSsdWriteThroughPutMetric = "tera_ts_ssd_write_through_put"; + +const char* const kPosixReadThroughPutMetric = "tera_ts_posix_read_through_put"; +const char* const kPosixWriteThroughPutMetric = "tera_ts_posix_write_through_put"; +const char* const kPosixReadCountMetric = "tera_ts_posix_read_count"; +const char* const kPosixWriteCountMetric = "tera_ts_posix_write_count"; +const char* const kPosixSyncCountMetric = "tera_ts_posix_sync_count"; +const char* const kPosixListCountMetric = "tera_ts_posix_list_count"; +const char* const kPosixExistsCountMetric = "tera_ts_posix_exists_count"; +const char* const kPosixOpenCountMetric = "tera_ts_posix_open_count"; +const char* const kPosixCloseCountMetric = "tera_ts_posix_close_count"; +const char* const kPosixDeleteCountMetric = "tera_ts_posix_delete_count"; +const char* const kPosixTellCountMetric = "tera_ts_posix_tell_count"; +const char* const kPosixSeekCountMetric = "tera_ts_posix_seek_count"; +const char* const kPosixInfoCountMetric = "tera_ts_posix_info_count"; +const char* const kPosixOtherCountMetric = "tera_ts_posix_other_count"; + +const char* const kRawkeyCompareCountMetric = "tera_ts_rawkey_compare_count"; +const char* const kSnappyCompressionRatioMetric = "tera_ts_snappy_compression_percentage"; +} // end namespace tabletnode +} // end namespace tera + +#endif // TERA_TABLETNODE_TABLETNODE_METRIC_NAME_H_ + +/* vim: set expandtab ts=4 sw=4 sts=4 tw=100: */ + diff --git a/src/tabletnode/tabletnode_sysinfo.cc b/src/tabletnode/tabletnode_sysinfo.cc index b3c09520d..30b325df9 100644 --- a/src/tabletnode/tabletnode_sysinfo.cc +++ b/src/tabletnode/tabletnode_sysinfo.cc @@ -4,8 +4,7 @@ // // Author: Xu Peilin (xupeilin@baidu.com) -#include "tabletnode_sysinfo.h" - +#include #include #include #include @@ -16,17 +15,20 @@ #include #include +#include "tabletnode/tabletnode_sysinfo.h" #include "common/base/string_number.h" #include "proto/proto_helper.h" -#include "utils/timer.h" +#include "tabletnode/tabletnode_metric_name.h" +#include "common/timer.h" #include "utils/tprinter.h" #include "utils/utils_cmd.h" +#include "common/metric/collector_report_publisher.h" +#include "common/metric/ratio_subscriber.h" +#include "common/metric/prometheus_subscriber.h" -DEFINE_int32(tera_tabletnode_sysinfo_mem_collect_interval, 10, "interval of mem checking(s)"); -DEFINE_int32(tera_tabletnode_sysinfo_net_collect_interval, 5, "interval of net checking(s)"); -DEFINE_int32(tera_tabletnode_sysinfo_cpu_collect_interval, 5, "interval of cpu checking(s)"); DECLARE_bool(tera_tabletnode_dump_running_info); DECLARE_string(tera_tabletnode_running_info_dump_file); +DECLARE_int64(tera_tabletnode_sysinfo_check_interval); namespace leveldb { extern tera::Counter rawkey_compare_counter; @@ -49,9 +51,6 @@ extern tera::Counter posix_seek_counter; extern tera::Counter posix_info_counter; extern tera::Counter posix_other_counter; -extern tera::Counter snappy_before_size_counter; -extern tera::Counter snappy_after_size_counter; - extern tera::Counter dfs_read_counter; extern tera::Counter dfs_write_counter; extern tera::Counter dfs_read_delay_counter; @@ -87,17 +86,127 @@ extern tera::Counter ssd_write_counter; extern tera::Counter ssd_write_size_counter; } -tera::Counter rand_read_delay; -extern tera::Counter row_read_delay; -tera::Counter range_error_counter; -tera::Counter read_pending_counter; -tera::Counter write_pending_counter; -tera::Counter scan_pending_counter; -tera::Counter compact_pending_counter; namespace tera { namespace tabletnode { +// dfs metrics +tera::AutoCollectorRegister dfs_read_size_metric(kDfsReadBytesThroughPut, + std::unique_ptr(new CounterCollector(&leveldb::dfs_read_size_counter, true)), {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister dfs_write_size_metric(kDfsWriteBytesThroughPut, + std::unique_ptr(new CounterCollector(&leveldb::dfs_write_size_counter, true)), {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister dfs_read_delay_metric(kDfsReadDelayMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_read_delay_counter, true)), {}); +tera::AutoCollectorRegister dfs_write_delay_metric(kDfsWriteDelayMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_write_delay_counter, true)), {}); +tera::AutoCollectorRegister dfs_sync_delay_metric(kDfsSyncDelayMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_delay_counter, true)), {}); +tera::AutoCollectorRegister dfs_read_metric(kDfsReadCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_read_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_write_metric(kDfsWriteCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_write_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_sync_metric(kDfsSyncCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_counter, true)), {SubscriberType::QPS}); + +tera::AutoSubscriberRegister dfs_read_delay_avg_subscriber (std::unique_ptr(new RatioSubscriber( + MetricId(kDfsReadDelayPerRequestMetric), + std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsReadDelayMetric), SubscriberType::SUM)), + std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsReadCountMetric), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister dfs_write_delay_avg_subscriber (std::unique_ptr(new RatioSubscriber( + MetricId(kDfsWriteDelayPerRequestMetric), + std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsWriteDelayMetric), SubscriberType::SUM)), + std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsWriteCountMetric), SubscriberType::SUM))))); + +tera::AutoSubscriberRegister dfs_sync_delay_avg_subscriber (std::unique_ptr(new RatioSubscriber( + MetricId(kDfsSyncDelayPerRequestMetric), + std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsSyncDelayMetric), SubscriberType::SUM)), + std::unique_ptr(new PrometheusSubscriber(MetricId(kDfsSyncCountMetric), SubscriberType::SUM))))); + +tera::AutoCollectorRegister dfs_flush_metric(kDfsFlushCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_flush_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_list_metric(kDfsListCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_list_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_exists_metric(kDfsExistsCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_exists_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_open_metric(kDfsOpenCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_open_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_close_metric(kDfsCloseCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_close_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_delete_metric(kDfsDeleteCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_delete_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_tell_metric(kDfsTellCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_tell_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_info_metric(kDfsInfoCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_info_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_other_metric(kDfsOtherCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_other_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister dfs_read_hang_metric(kDfsReadHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_read_hang_counter, false))); +tera::AutoCollectorRegister dfs_write_hang_metric(kDfsWriteHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_write_hang_counter, false))); +tera::AutoCollectorRegister dfs_sync_hang_metric(kDfsSyncHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_sync_hang_counter, false))); +tera::AutoCollectorRegister dfs_flush_hang_metric(kDfsFlushHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_flush_hang_counter, false))); +tera::AutoCollectorRegister dfs_list_hang_metric(kDfsListHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_list_hang_counter, false))); +tera::AutoCollectorRegister dfs_exists_hang_metric(kDfsExistsHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_exists_hang_counter, false))); +tera::AutoCollectorRegister dfs_open_hang_metric(kDfsOpenHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_open_hang_counter, false))); +tera::AutoCollectorRegister dfs_close_hang_metric(kDfsCloseHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_close_hang_counter, false))); +tera::AutoCollectorRegister dfs_delete_hang_metric(kDfsDeleteHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_delete_hang_counter, false))); +tera::AutoCollectorRegister dfs_tell_hang_metric(kDfsTellHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_tell_hang_counter, false))); +tera::AutoCollectorRegister dfs_info_hang_metric(kDfsInfoHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_info_hang_counter, false))); +tera::AutoCollectorRegister dfs_other_hang_metric(kDfsOtherHangMetric, + std::unique_ptr(new CounterCollector(&leveldb::dfs_other_hang_counter, false))); +// ssd metrics +tera::AutoCollectorRegister ssd_read_through_put_metric(kSsdReadThroughPutMetric, + std::unique_ptr(new CounterCollector(&leveldb::ssd_read_size_counter, true)), {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister ssd_write_through_put_metric(kSsdWriteThroughPutMetric, + std::unique_ptr(new CounterCollector(&leveldb::ssd_write_size_counter, true)), {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister ssd_read_metric(kSsdReadCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::ssd_read_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister ssd_write_metric(kSsdWriteCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::ssd_write_counter, true)), {SubscriberType::QPS}); +// local metrics +tera::AutoCollectorRegister posix_read_size_metric(kPosixReadThroughPutMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_read_size_counter, true)), {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister posix_write_size_metric(kPosixWriteThroughPutMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_write_size_counter, true)), {SubscriberType::THROUGHPUT}); +tera::AutoCollectorRegister posix_read_metric(kPosixReadCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_read_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_write_metric(kPosixWriteCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_write_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_sync_metric(kPosixSyncCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_sync_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_list_metric(kPosixListCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_list_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_exists_metric(kPosixExistsCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_exists_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_open_metric(kPosixOpenCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_open_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_close_metric(kPosixCloseCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_close_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_delete_metric(kPosixDeleteCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_delete_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_tell_metric(kPosixTellCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_tell_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_seek_metric(kPosixSeekCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_seek_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_info_metric(kPosixInfoCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_info_counter, true)), {SubscriberType::QPS}); +tera::AutoCollectorRegister posix_other_metric(kPosixOtherCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::posix_other_counter, true)), {SubscriberType::QPS}); + +tera::AutoCollectorRegister rawkey_compare_metric(kRawkeyCompareCountMetric, + std::unique_ptr(new CounterCollector(&leveldb::rawkey_compare_counter, true)), {SubscriberType::QPS}); + class TabletNodeSysInfoDumper { public: TabletNodeSysInfoDumper(const std::string& filename) : @@ -135,29 +244,16 @@ class TabletNodeSysInfoDumper { FILE* fp_; }; -TabletNodeSysInfo::TabletNodeSysInfo() - : mem_check_ts_(0), - net_check_ts_(0), - io_check_ts_(0), - net_tx_total_(0), - net_rx_total_(0), - cpu_check_ts_(0), - tablet_check_ts_(0) { +TabletNodeSysInfo::TabletNodeSysInfo() { + last_check_ts_ = get_micros(); } TabletNodeSysInfo::TabletNodeSysInfo(const TabletNodeInfo& info) - : info_(info), - mem_check_ts_(0), - net_check_ts_(0), - io_check_ts_(0), - net_tx_total_(0), - net_rx_total_(0), - cpu_check_ts_(0), - tablet_check_ts_(0) { + : info_(info) { + last_check_ts_ = get_micros(); } -TabletNodeSysInfo::~TabletNodeSysInfo() { -} +TabletNodeSysInfo::~TabletNodeSysInfo() {} void TabletNodeSysInfo::AddExtraInfo(const std::string& name, int64_t value) { MutexLock lock(&mutex_); @@ -176,32 +272,79 @@ void TabletNodeSysInfo::SetTimeStamp(int64_t ts) { info_.set_timestamp(ts); } +struct DBSize { + uint64_t size; + std::vector lg_size; +}; + void TabletNodeSysInfo::CollectTabletNodeInfo(TabletManager* tablet_manager, const string& server_addr) { + std::vector tablet_ios; + std::vector db_status_vec; + std::vector db_size_vec; + + int64_t ts = get_micros(); + bool need_check = false; + if (ts - last_check_ts_ > FLAGS_tera_tabletnode_sysinfo_check_interval) { + last_check_ts_ = ts; + need_check = true; + } + tablet_manager->GetAllTablets(&tablet_ios); + std::vector::iterator it = tablet_ios.begin(); + while (it != tablet_ios.end()) { + io::TabletIO* tablet_io = *it; + if (tablet_io->ShouldForceUnloadOnError()) { + LOG(WARNING) << *tablet_io << ", has internal error triggered unload"; + StatusCode status; + if (!tablet_io->Unload(&status)) { + LOG(ERROR) << *tablet_io << ", Unload tablet failed, status: " + << StatusCodeToString(status); + } + if (!tablet_manager->RemoveTablet(tablet_io->GetTableName(), + tablet_io->GetStartKey(), tablet_io->GetEndKey(), &status)) { + LOG(ERROR) << *tablet_io << ", remove from TabletManager failed, status: " + << StatusCodeToString(status); + } + tablet_io->DecRef(); + it = tablet_ios.erase(it); + continue; + } + + // check db status whether is corruption + TabletStatus tablet_status = static_cast(kTabletReady); + tablet_io->GetDBStatus(&tablet_status, need_check); + db_status_vec.push_back(tablet_status); + + DBSize db_size; + tablet_io->GetDataSize(&db_size.size, &db_size.lg_size); + db_size_vec.push_back(db_size); + + ++it; + } + MutexLock lock(&mutex_); - int64_t cur_ts = get_micros(); - int64_t interval = cur_ts - tablet_check_ts_; - tablet_check_ts_ = cur_ts; + std::shared_ptr latest_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + int64_t interval = latest_report->interval_ms; + if (interval <= 0) { + // maybe happen at first report, the metric values must be 0 + // set to any non-zero value to avoid div 0 + VLOG(16) << "Metric Report interval is 0"; + interval = 1000; + } tablet_list_.Clear(); int64_t total_size = 0; - int64_t low_read_cell = 0; - int64_t scan_rows = 0; int64_t scan_kvs = 0; - int64_t scan_size = 0; - int64_t read_rows = 0; int64_t read_kvs = 0; - int64_t read_size = 0; - int64_t write_rows = 0; int64_t write_kvs = 0; - int64_t write_size = 0; int64_t busy_cnt = 0; + int64_t db_corruption_cnt = 0; + + for (uint32_t i = 0; i < tablet_ios.size(); i++) { + io::TabletIO* tablet_io = tablet_ios[i]; + TabletStatus tablet_status = db_status_vec[i]; + DBSize db_size = db_size_vec[i]; - std::vector tablet_ios; - tablet_manager->GetAllTablets(&tablet_ios); - std::vector::iterator it = tablet_ios.begin(); - for (; it != tablet_ios.end(); ++it) { - io::TabletIO* tablet_io = *it; TabletMeta* tablet_meta = tablet_list_.add_meta(); tablet_meta->set_status(TabletStatus(tablet_io->GetStatus())); tablet_meta->set_server_addr(server_addr); @@ -210,274 +353,185 @@ void TabletNodeSysInfo::CollectTabletNodeInfo(TabletManager* tablet_manager, tablet_meta->mutable_key_range()->set_key_start(tablet_io->GetStartKey()); tablet_meta->mutable_key_range()->set_key_end(tablet_io->GetEndKey()); - std::vector lgsize; - uint64_t size; - tablet_io->GetDataSize(&size, &lgsize); - tablet_meta->set_size(size); - for (size_t i = 0; i < lgsize.size(); ++i) { - tablet_meta->add_lg_size(lgsize[i]); + tablet_meta->set_size(db_size.size); + for (size_t i = 0; i < db_size.lg_size.size(); ++i) { + tablet_meta->add_lg_size(db_size.lg_size[i]); } tablet_meta->set_compact_status(tablet_io->GetCompactStatus()); total_size += tablet_meta->size(); TabletCounter* counter = tablet_list_.add_counter(); - tablet_io->GetAndClearCounter(counter); - low_read_cell += counter->low_read_cell(); - scan_rows += counter->scan_rows(); + const std::string& label_str = tablet_io->GetMetricLabel(); + counter->set_low_read_cell(latest_report->FindMetricValue(kLowReadCellMetricName, label_str)); + counter->set_scan_rows(latest_report->FindMetricValue(kScanRowsMetricName, label_str)); + counter->set_scan_kvs(latest_report->FindMetricValue(kScanKvsMetricName, label_str)); + counter->set_scan_size(latest_report->FindMetricValue(kScanThroughPutMetricName, label_str)); + counter->set_read_rows(latest_report->FindMetricValue(kReadRowsMetricName, label_str)); + counter->set_read_kvs(latest_report->FindMetricValue(kReadKvsMetricName, label_str)); + counter->set_read_size(latest_report->FindMetricValue(kReadThroughPutMetricName, label_str)); + counter->set_write_rows(latest_report->FindMetricValue(kWriteRowsMetricName, label_str)); + counter->set_write_kvs(latest_report->FindMetricValue(kWriteKvsMetricName, label_str)); + counter->set_write_size(latest_report->FindMetricValue(kWriteThroughPutMetricName, label_str)); + counter->set_is_on_busy(tablet_io->IsBusy()); + double write_workload = 0; + tablet_io->Workload(&write_workload); + counter->set_write_workload(write_workload); + counter->set_db_status(tablet_status); // set runtime counter + scan_kvs += counter->scan_kvs(); - scan_size += counter->scan_size(); - read_rows += counter->read_rows(); read_kvs += counter->read_kvs(); - read_size += counter->read_size(); - write_rows += counter->write_rows(); write_kvs += counter->write_kvs(); - write_size += counter->write_size(); if (counter->is_on_busy()) { busy_cnt++; } + if (counter->db_status() == kTabletCorruption) { + db_corruption_cnt++; + } tablet_io->DecRef(); } - info_.set_low_read_cell(low_read_cell * 1000000 / interval); - info_.set_scan_rows(scan_rows * 1000000 / interval); - info_.set_scan_kvs(scan_kvs * 1000000 / interval); - info_.set_scan_size(scan_size * 1000000 / interval); - info_.set_read_rows(read_rows * 1000000 / interval); - info_.set_read_kvs(read_kvs * 1000000 / interval); - info_.set_read_size(read_size * 1000000 / interval); - info_.set_write_rows(write_rows * 1000000 / interval); - info_.set_write_kvs(write_kvs * 1000000 / interval); - info_.set_write_size(write_size * 1000000 / interval); + + int64_t low_read_cell = + latest_report->FindMetricValue(kLowLevelReadMetric); + int64_t read_rows = + latest_report->FindMetricValue(kRowCountMetric, kApiLabelRead); + int64_t read_size = + latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelRead); + int64_t write_rows = + latest_report->FindMetricValue(kRowCountMetric, kApiLabelWrite); + int64_t write_size = + latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelWrite); + int64_t scan_rows = + latest_report->FindMetricValue(kRowCountMetric, kApiLabelScan); + int64_t scan_size = + latest_report->FindMetricValue(kRowThroughPutMetric, kApiLabelScan); + + info_.set_low_read_cell(low_read_cell * 1000 / interval); + info_.set_scan_rows(scan_rows * 1000 / interval); + info_.set_scan_kvs(scan_kvs * 1000 / interval); + info_.set_scan_size(scan_size * 1000 / interval); + info_.set_read_rows(read_rows * 1000 / interval); + info_.set_read_kvs(read_kvs * 1000 / interval); + info_.set_read_size(read_size * 1000 / interval); + info_.set_write_rows(write_rows * 1000 / interval); + info_.set_write_kvs(write_kvs * 1000 / interval); + info_.set_write_size(write_size * 1000 / interval); info_.set_tablet_onbusy(busy_cnt); + info_.set_tablet_corruption(db_corruption_cnt); // refresh tabletnodeinfo info_.set_load(total_size); info_.set_tablet_total(tablet_ios.size()); int64_t tmp; - tmp = leveldb::dfs_read_size_counter.Clear() * 1000000 / interval; + tmp = latest_report->FindMetricValue(kDfsReadBytesThroughPut) * 1000 / interval; info_.set_dfs_io_r(tmp); - tmp = leveldb::dfs_write_size_counter.Clear() * 1000000 / interval; + tmp = latest_report->FindMetricValue(kDfsWriteBytesThroughPut) * 1000 / interval; info_.set_dfs_io_w(tmp); - tmp = leveldb::posix_read_size_counter.Clear() * 1000000 / interval; + tmp = latest_report->FindMetricValue(kPosixReadThroughPutMetric) * 1000 / interval; info_.set_local_io_r(tmp); - tmp = leveldb::posix_write_size_counter.Clear() * 1000000 / interval; + tmp = latest_report->FindMetricValue(kPosixWriteThroughPutMetric) * 1000 / interval; info_.set_local_io_w(tmp); - info_.set_read_pending(read_pending_counter.Get()); - info_.set_write_pending(write_pending_counter.Get()); - info_.set_scan_pending(scan_pending_counter.Get()); + int64_t read_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelRead); + int64_t write_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelWrite); + int64_t scan_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelScan); + int64_t compact_pending = latest_report->FindMetricValue(kPendingCountMetric, kApiLabelCompact); + + info_.set_read_pending(read_pending); + info_.set_write_pending(write_pending); + info_.set_scan_pending(scan_pending); // collect extra infos info_.clear_extra_info(); ExtraTsInfo* einfo = info_.add_extra_info(); - if (read_rows == 0) { - tmp = 0; - } else { - tmp = rand_read_delay.Clear() / read_rows; - } - einfo->set_name("rand_read_delay"); - einfo->set_value(tmp / 1000); - einfo = info_.add_extra_info(); - if (read_rows == 0) { - tmp = 0; - } else { - tmp = row_read_delay.Clear() / read_rows; - } - einfo->set_name("row_read_delay"); - einfo->set_value(tmp / 1000); + int64_t range_error_sum = + latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelRead) + + latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelWrite) + + latest_report->FindMetricValue(kRangeErrorMetric, kApiLabelScan); - einfo = info_.add_extra_info(); - tmp = range_error_counter.Clear() * 1000000 / interval; + tmp = range_error_sum * 1000 / interval; einfo->set_name("range_error"); einfo->set_value(tmp); einfo = info_.add_extra_info(); - tmp = read_pending_counter.Get(); einfo->set_name("read_pending"); - einfo->set_value(tmp); + einfo->set_value(read_pending); einfo = info_.add_extra_info(); - tmp = write_pending_counter.Get(); einfo->set_name("write_pending"); - einfo->set_value(tmp); + einfo->set_value(write_pending); einfo = info_.add_extra_info(); - tmp = scan_pending_counter.Get(); einfo->set_name("scan_pending"); - einfo->set_value(tmp); + einfo->set_value(scan_pending); einfo = info_.add_extra_info(); - tmp = compact_pending_counter.Get(); einfo->set_name("compact_pending"); + einfo->set_value(compact_pending); + + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelRead) * 1000 / interval; + einfo->set_name("read_reject"); einfo->set_value(tmp); -} -// return the number of ticks(jiffies) that this process -// has been scheduled in user and kernel mode. -static long long ProcessCpuTick() { - const int PATH_MAX_LEN = 64; - char path[PATH_MAX_LEN]; - sprintf(path, "/proc/%d/stat", getpid()); - FILE *fp = fopen(path, "r"); - if (fp == NULL) { - return 0; - } - long long utime = 0, stime = 0; - if (fscanf(fp, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %lld %lld", - &utime, &stime) < 2) { - LOG(ERROR) << "get cpu tick from /proc/" << getpid() << "/stat failed."; - } - fclose(fp); - return utime + stime; -} + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelWrite) * 1000 / interval; + einfo->set_name("write_reject"); + einfo->set_value(tmp); -// return number of cpu(cores) -static int GetCpuCount() { -#if defined(_SC_NPROCESSORS_ONLN) - return sysconf(_SC_NPROCESSORS_ONLN); -#else - FILE *fp = fopen("/proc/stat", "r"); - if (fp == NULL) { - return 1; - } - const int LINE_MAX_LEN = 256; // enough in here - char *aline = (char*)malloc(LINE_MAX_LEN); - if (aline == NULL) { - LOG(ERROR) << "[HardWare System Info] malloc failed."; - return 1; - } - const int HEADER_MAX_LEN = 10; - char header[HEADER_MAX_LEN]; - int i=0; - size_t len=0; - getline(&aline, &len, fp); // drop the first line - while (getline(&aline, &len, fp)) { - i++; - sscanf(aline, "%s", header); - if (!strncmp(header, "intr", HEADER_MAX_LEN)) { - break; - } - } - fclose(fp); - free(aline); - return i-1 > 0 ? i-1 : 1; -#endif -} + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kRejectCountMetric, kApiLabelScan) * 1000 / interval; + einfo->set_name("scan_reject"); + einfo->set_value(tmp); -// irix_on == 1 --> irix mode on -// irix_on == 0 --> irix mode off -// -// return this process's the percentage of CPU usage ( %CPU ). -// -// NOTE: the first time call this function would get 0 as result. -static float GetCpuUsage(int is_irix_on) { - static int cpu_count = 1; // assume cpu count is not variable when process is running - static unsigned long hertz = 0; - if (hertz == 0) { - hertz = sysconf(_SC_CLK_TCK); - cpu_count = GetCpuCount(); - } + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelRead) * 1000 / interval; + einfo->set_name("read_request"); + einfo->set_value(tmp); - static struct timeval oldtimev; - struct timeval timev; - gettimeofday(&timev, NULL); - float et = (timev.tv_sec - oldtimev.tv_sec) - + (float)(timev.tv_usec - oldtimev.tv_usec) / 1000000.0; - oldtimev.tv_sec = timev.tv_sec; - oldtimev.tv_usec = timev.tv_usec; - - float frame_etscale; - if (is_irix_on) { - frame_etscale = 100.0f / ((float)hertz * et); - } else { - frame_etscale = 100.0f / ((float)hertz * et * cpu_count); - } + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelWrite) * 1000 / interval; + einfo->set_name("write_request"); + einfo->set_value(tmp); - static unsigned long oldtick; - unsigned long newtick; - newtick = ProcessCpuTick(); - float u = (newtick - (float)oldtick) * frame_etscale; - oldtick = newtick; + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kRequestCountMetric, kApiLabelScan) * 1000 / interval; + einfo->set_name("scan_request"); + einfo->set_value(tmp); - const float MAX_CPU_USAGE = 99.9f; - if (u > MAX_CPU_USAGE ) { - u = MAX_CPU_USAGE; - } + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelRead) * 1000 / interval; + einfo->set_name("read_error"); + einfo->set_value(tmp); - // rounding cpu usage to 1 decimal places - const int USAGE_STR_MAX_LEN = 5; - char usage_str[USAGE_STR_MAX_LEN]; - sprintf(usage_str, "%.1f\n", u); - sscanf(usage_str, "%f", &u); - return u; + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelWrite) * 1000 / interval; + einfo->set_name("write_error"); + einfo->set_value(tmp); + + einfo = info_.add_extra_info(); + tmp = latest_report->FindMetricValue(kErrorCountMetric, kApiLabelScan) * 1000 / interval; + einfo->set_name("scan_error"); + einfo->set_value(tmp); } void TabletNodeSysInfo::CollectHardwareInfo() { MutexLock lock(&mutex_); - int pid = getpid(); - FILE* f; - std::ostringstream ss; - ss << "/proc/" << pid << "/"; - int64_t cur_ts = get_micros(); - - int64_t interval = cur_ts - mem_check_ts_; - if (interval / 1000000 > FLAGS_tera_tabletnode_sysinfo_mem_collect_interval) { - mem_check_ts_ = cur_ts; - int64_t mem; - f = fopen((ss.str() + "statm").data(), "r"); - if (f == NULL) { - return; - } - fscanf(f, "%*d %ld", &mem); - mem = mem * 4 * 1024; - fclose(f); - info_.set_mem_used(mem); + std::shared_ptr latest_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); - VLOG(15) << "[HardWare System Info] Memory: " << mem * 4; - return; - } + int64_t cpu_usage = latest_report->FindMetricValue(kInstCpuMetricName); + info_.set_cpu_usage(static_cast(cpu_usage)); - interval = cur_ts - net_check_ts_; - if (interval / 1000000 > FLAGS_tera_tabletnode_sysinfo_net_collect_interval) { - net_check_ts_ = cur_ts; - int64_t net_rx = 0, net_tx = 0; - f = fopen((ss.str() + "net/dev").data(), "r"); - if (f == NULL) { - return; - } - int ret = fseek(f, 327, SEEK_SET); - CHECK_EQ(ret, 0); - for (int i = 0; i < 10; i++) { - while (':' != fgetc(f)); - ret = fscanf(f, "%ld%*d%*d%*d%*d%*d%*d%*d%ld", &net_rx, &net_tx); - if (ret >= 2 && net_rx > 0 && net_tx > 0) { - break; - } - } - fclose(f); - - int64_t tmp; - tmp = (net_rx - net_rx_total_) * 1000000 / interval; - info_.set_net_rx(tmp); - tmp = (net_tx - net_tx_total_) * 1000000 / interval; - info_.set_net_tx(tmp); - net_rx_total_ = net_rx; - net_tx_total_ = net_tx; - - VLOG(15) << "[HardWare System Info] Network RX/TX: " << net_rx << " / " << net_tx; - return; - } + int64_t mem_usage = latest_report->FindMetricValue(kInstMemMetricName); + info_.set_mem_used(mem_usage); - interval = cur_ts - cpu_check_ts_; - if (interval / 1000000 > FLAGS_tera_tabletnode_sysinfo_cpu_collect_interval) { - cpu_check_ts_ = cur_ts; - float cpu_usage = GetCpuUsage(0); - info_.set_cpu_usage(cpu_usage); - VLOG(15) << "[HardWare System Info] %CPU: "<< cpu_usage; - return; - } + int64_t net_rx_usage = latest_report->FindMetricValue(kInstNetRXMetricName); + info_.set_net_rx(net_rx_usage); + + int64_t net_tx_usage = latest_report->FindMetricValue(kInstNetTXMetricName); + info_.set_net_tx(net_tx_usage); } void TabletNodeSysInfo::GetTabletNodeInfo(TabletNodeInfo* info) { @@ -502,11 +556,17 @@ void TabletNodeSysInfo::SetStatus(StatusCode status) { void TabletNodeSysInfo::DumpLog() { MutexLock lock(&mutex_); - + std::shared_ptr latest_report = CollectorReportPublisher::GetInstance().GetCollectorReport(); + int64_t interval = latest_report->interval_ms; + TabletNodeSysInfoDumper dumper(FLAGS_tera_tabletnode_running_info_dump_file); - double snappy_ratio = (double)leveldb::snappy_before_size_counter.Clear() - / leveldb::snappy_after_size_counter.Clear(); + double snappy_ratio = latest_report->FindMetricValue(kSnappyCompressionRatioMetric); + if (snappy_ratio > 0) { + snappy_ratio /= 100.0; + } + + int64_t rawkey_compare_count = latest_report->FindMetricValue(kRawkeyCompareCountMetric); if (FLAGS_tera_tabletnode_dump_running_info) { dumper.DumpData("low_level", info_.low_read_cell()); @@ -517,7 +577,7 @@ void TabletNodeSysInfo::DumpLog() { dumper.DumpData("scan", info_.scan_rows()); dumper.DumpData("sspeed", info_.scan_size()); dumper.DumpData("snappy", snappy_ratio); - dumper.DumpData("rowcomp", leveldb::rawkey_compare_counter.Get()); + dumper.DumpData("rowcomp", rawkey_compare_count); } LOG(INFO) << "[SysInfo]" @@ -529,7 +589,7 @@ void TabletNodeSysInfo::DumpLog() { << " scan " << info_.scan_rows() << " sspeed " << utils::ConvertByteToString(info_.scan_size()) << " snappy " << snappy_ratio - << " rawcomp " << leveldb::rawkey_compare_counter.Clear(); + << " rawcomp " << rawkey_compare_count; // hardware info if (FLAGS_tera_tabletnode_dump_running_info) { @@ -549,15 +609,19 @@ void TabletNodeSysInfo::DumpLog() { << " cpu_usage " << info_.cpu_usage() << "%"; // net and io info + int64_t ssd_read_count = latest_report->FindMetricValue(kSsdReadCountMetric); + int64_t ssd_read_size = latest_report->FindMetricValue(kSsdReadThroughPutMetric); + int64_t ssd_write_count = latest_report->FindMetricValue(kSsdWriteCountMetric); + int64_t ssd_write_size = latest_report->FindMetricValue(kSsdWriteThroughPutMetric); if (FLAGS_tera_tabletnode_dump_running_info) { dumper.DumpData("dfs_r", info_.dfs_io_r()); dumper.DumpData("dfs_w", info_.dfs_io_w()); dumper.DumpData("local_r", info_.local_io_r()); dumper.DumpData("local_w", info_.local_io_w()); - dumper.DumpData("ssd_r_counter", leveldb::ssd_read_counter.Get()); - dumper.DumpData("ssd_r_size", leveldb::ssd_read_size_counter.Get()); - dumper.DumpData("ssd_w_counter", leveldb::ssd_write_counter.Get()); - dumper.DumpData("ssd_w_size", leveldb::ssd_write_size_counter.Get()); + dumper.DumpData("ssd_r_counter", ssd_read_count); + dumper.DumpData("ssd_r_size", ssd_read_size); + dumper.DumpData("ssd_w_counter", ssd_write_count); + dumper.DumpData("ssd_w_size", ssd_write_size); } LOG(INFO) << "[IO]" @@ -569,10 +633,79 @@ void TabletNodeSysInfo::DumpLog() { << utils::ConvertByteToString(info_.local_io_r()) << " local_w " << info_.local_io_w() << " " << utils::ConvertByteToString(info_.local_io_w()) - << " ssd_r " << leveldb::ssd_read_counter.Clear() << " " - << utils::ConvertByteToString(leveldb::ssd_read_size_counter.Clear()) - << " ssd_w " << leveldb::ssd_write_counter.Clear() << " " - << utils::ConvertByteToString(leveldb::ssd_write_size_counter.Clear()); + << " ssd_r " << ssd_read_count << " " + << utils::ConvertByteToString(ssd_read_size) + << " ssd_w " << ssd_write_count << " " + << utils::ConvertByteToString(ssd_write_size); + + // cache info + double block_cache_hitrate = static_cast(latest_report->FindMetricValue(kBlockCacheHitRateMetric)) / 100.0; + if (block_cache_hitrate < 0.0) { + block_cache_hitrate = NAN; + } + int64_t block_cache_entries = latest_report->FindMetricValue(kBlockCacheEntriesMetric); + int64_t block_cache_charge = latest_report->FindMetricValue(kBlockCacheChargeMetric); + double table_cache_hitrate = static_cast(latest_report->FindMetricValue(kTableCacheHitRateMetric)) / 100.0; + if (table_cache_hitrate < 0.0) { + table_cache_hitrate = NAN; + } + int64_t table_cache_entries = latest_report->FindMetricValue(kTableCacheEntriesMetric); + int64_t table_cache_charge = latest_report->FindMetricValue(kTableCacheChargeMetric); + if (FLAGS_tera_tabletnode_dump_running_info) { + dumper.DumpData("block_cache_hitrate", block_cache_hitrate); + dumper.DumpData("block_cache_entry", block_cache_entries); + dumper.DumpData("block_cache_bytes", block_cache_charge); + dumper.DumpData("table_cache_hitrate", table_cache_hitrate); + dumper.DumpData("table_cache_entry", table_cache_entries); + dumper.DumpData("table_cache_bytes", table_cache_charge); + } + LOG(INFO) << "[Cache HitRate/Cnt/Size] table_cache " + << table_cache_hitrate << " " + << table_cache_entries << " " + << table_cache_charge + << ", block_cache " + << block_cache_hitrate << " " + << block_cache_entries << " " + << block_cache_charge; + + int64_t finished_read_request = + latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelRead); + int64_t finished_write_request = + latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelWrite); + int64_t finished_scan_request = + latest_report->FindMetricValue(kFinishedRequestCountMetric, kApiLabelScan); + LOG(INFO) << "[Finished Requests] " + << "read: " << finished_read_request * 1000 / interval + << ", write: " << finished_write_request * 1000 / interval + << ", scan: " << finished_scan_request * 1000 / interval; + + int64_t read_request_delay = + (finished_read_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, kApiLabelRead) / finished_read_request); + int64_t write_request_delay = + (finished_write_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, kApiLabelWrite) / finished_write_request); + int64_t scan_request_delay = + (finished_scan_request == 0 ? 0 : latest_report->FindMetricValue(kRequestDelayMetric, kApiLabelScan) / finished_scan_request); + LOG(INFO) << "[Requests Delay In Ms] " + << "read: " << read_request_delay / 1000.0 + << ", write: " << write_request_delay / 1000.0 + << ", scan: " << scan_request_delay / 1000.0; + + int64_t read_rows = + latest_report->FindMetricValue(kRowCountMetric, kApiLabelRead); + int64_t write_rows = + latest_report->FindMetricValue(kRowCountMetric, kApiLabelWrite); + int64_t scan_rows = + latest_report->FindMetricValue(kRowCountMetric, kApiLabelScan); + int64_t row_read_delay = + (read_rows == 0 ? 0 : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelRead) / read_rows); + int64_t row_write_delay = + (write_rows == 0 ? 0 : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelWrite) / write_rows); + int64_t row_scan_delay = + (scan_rows == 0 ? 0 : latest_report->FindMetricValue(kRowDelayMetric, kApiLabelScan) / scan_rows); + LOG(INFO) << "[Row Delay In Ms] " + << "row_read_delay: " << row_read_delay / 1000.0 + << ", row_write_delay: " << row_write_delay / 1000.0 + << ", row_scan_delay: " << row_scan_delay / 1000.0; // extra info std::ostringstream ss; @@ -587,102 +720,138 @@ void TabletNodeSysInfo::DumpLog() { LOG(INFO) << ss.str(); // DFS info - double rdelay = leveldb::dfs_read_counter.Get() ? - leveldb::dfs_read_delay_counter.Clear()/1000/leveldb::dfs_read_counter.Get() - : 0; - double wdelay = leveldb::dfs_write_counter.Get() ? - leveldb::dfs_write_delay_counter.Clear()/1000/leveldb::dfs_write_counter.Get() - : 0; - double sdelay = leveldb::dfs_sync_counter.Get() ? - leveldb::dfs_sync_delay_counter.Clear()/1000/leveldb::dfs_sync_counter.Get() - : 0; + int64_t dfs_read_delay = latest_report->FindMetricValue(kDfsReadDelayMetric); + int64_t dfs_write_delay = latest_report->FindMetricValue(kDfsWriteDelayMetric); + int64_t dfs_sync_delay = latest_report->FindMetricValue(kDfsSyncDelayMetric); + int64_t dfs_read_count = latest_report->FindMetricValue(kDfsReadCountMetric); + int64_t dfs_write_count = latest_report->FindMetricValue(kDfsWriteCountMetric); + int64_t dfs_sync_count = latest_report->FindMetricValue(kDfsSyncCountMetric); + int64_t dfs_flush_count = latest_report->FindMetricValue(kDfsFlushCountMetric); + int64_t dfs_list_count = latest_report->FindMetricValue(kDfsListCountMetric); + int64_t dfs_other_count = latest_report->FindMetricValue(kDfsOtherCountMetric); + int64_t dfs_exists_count = latest_report->FindMetricValue(kDfsExistsCountMetric); + int64_t dfs_open_count = latest_report->FindMetricValue(kDfsOpenCountMetric); + int64_t dfs_close_count = latest_report->FindMetricValue(kDfsCloseCountMetric); + int64_t dfs_delete_count = latest_report->FindMetricValue(kDfsDeleteCountMetric); + int64_t dfs_tell_count = latest_report->FindMetricValue(kDfsTellCountMetric); + int64_t dfs_info_count = latest_report->FindMetricValue(kDfsInfoCountMetric); + int64_t dfs_read_hang = latest_report->FindMetricValue(kDfsReadHangMetric); + int64_t dfs_write_hang = latest_report->FindMetricValue(kDfsWriteHangMetric); + int64_t dfs_sync_hang = latest_report->FindMetricValue(kDfsSyncHangMetric); + int64_t dfs_flush_hang = latest_report->FindMetricValue(kDfsFlushHangMetric); + int64_t dfs_list_hang = latest_report->FindMetricValue(kDfsListHangMetric); + int64_t dfs_other_hang = latest_report->FindMetricValue(kDfsOtherHangMetric); + int64_t dfs_exists_hang = latest_report->FindMetricValue(kDfsExistsHangMetric); + int64_t dfs_open_hang = latest_report->FindMetricValue(kDfsOpenHangMetric); + int64_t dfs_close_hang = latest_report->FindMetricValue(kDfsCloseHangMetric); + int64_t dfs_delete_hang = latest_report->FindMetricValue(kDfsDeleteHangMetric); + int64_t dfs_tell_hang = latest_report->FindMetricValue(kDfsTellHangMetric); + int64_t dfs_info_hang = latest_report->FindMetricValue(kDfsInfoHangMetric); + double rdelay = dfs_read_count ? static_cast(dfs_read_delay) / 1000.0 / dfs_read_count : 0; + double wdelay = dfs_write_count ? static_cast(dfs_write_delay) / 1000.0 / dfs_write_count : 0; + double sdelay = dfs_sync_count ? static_cast(dfs_sync_delay) / 1000.0 / dfs_sync_count : 0; if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("dfs_read", leveldb::dfs_read_counter.Get()); - dumper.DumpData("dfs_read_hang", leveldb::dfs_read_hang_counter.Get()); + dumper.DumpData("dfs_read", dfs_read_count); + dumper.DumpData("dfs_read_hang", dfs_read_hang); dumper.DumpData("dfs_rdealy", rdelay); - dumper.DumpData("dfs_write", leveldb::dfs_write_counter.Get()); - dumper.DumpData("dfs_write_hang", leveldb::dfs_write_hang_counter.Get()); + dumper.DumpData("dfs_write", dfs_write_count); + dumper.DumpData("dfs_write_hang", dfs_write_hang); dumper.DumpData("dfs_wdelay", wdelay); - dumper.DumpData("dfs_sync", leveldb::dfs_sync_counter.Get()); - dumper.DumpData("dfs_sync_hang", leveldb::dfs_sync_hang_counter.Get()); + dumper.DumpData("dfs_sync", dfs_sync_count); + dumper.DumpData("dfs_sync_hang", dfs_sync_hang); dumper.DumpData("dfs_sdelay", sdelay); - dumper.DumpData("dfs_flush", leveldb::dfs_flush_counter.Get()); - dumper.DumpData("dfs_flush_hang", leveldb::dfs_flush_hang_counter.Get()); - dumper.DumpData("dfs_list", leveldb::dfs_list_counter.Get()); - dumper.DumpData("dfs_list_hang", leveldb::dfs_list_hang_counter.Get()); - dumper.DumpData("dfs_info", leveldb::dfs_info_counter.Get()); - dumper.DumpData("dfs_info_hang", leveldb::dfs_info_hang_counter.Get()); - dumper.DumpData("dfs_exists", leveldb::dfs_exists_counter.Get()); - dumper.DumpData("dfs_exists_hang", leveldb::dfs_exists_hang_counter.Get()); - dumper.DumpData("dfs_open", leveldb::dfs_open_counter.Get()); - dumper.DumpData("dfs_open_hang", leveldb::dfs_open_hang_counter.Get()); - dumper.DumpData("dfs_close", leveldb::dfs_close_counter.Get()); - dumper.DumpData("dfs_close_hang", leveldb::dfs_close_hang_counter.Get()); - dumper.DumpData("dfs_delete", leveldb::dfs_delete_counter.Get()); - dumper.DumpData("dfs_delete_hang", leveldb::dfs_delete_hang_counter.Get()); - dumper.DumpData("dfs_tell", leveldb::dfs_tell_counter.Get()); - dumper.DumpData("dfs_tell_hang", leveldb::dfs_tell_hang_counter.Get()); - dumper.DumpData("dfs_other", leveldb::dfs_other_counter.Get()); - dumper.DumpData("dfs_other_hang", leveldb::dfs_other_hang_counter.Get()); + dumper.DumpData("dfs_flush", dfs_flush_count); + dumper.DumpData("dfs_flush_hang", dfs_flush_hang); + dumper.DumpData("dfs_list", dfs_list_count); + dumper.DumpData("dfs_list_hang", dfs_list_hang); + dumper.DumpData("dfs_info", dfs_info_count); + dumper.DumpData("dfs_info_hang", dfs_info_hang); + dumper.DumpData("dfs_exists", dfs_exists_count); + dumper.DumpData("dfs_exists_hang", dfs_exists_hang); + dumper.DumpData("dfs_open", dfs_open_count); + dumper.DumpData("dfs_open_hang", dfs_open_hang); + dumper.DumpData("dfs_close", dfs_close_count); + dumper.DumpData("dfs_close_hang", dfs_close_hang); + dumper.DumpData("dfs_delete", dfs_delete_count); + dumper.DumpData("dfs_delete_hang", dfs_delete_hang); + dumper.DumpData("dfs_tell", dfs_tell_count); + dumper.DumpData("dfs_tell_hang", dfs_tell_hang); + dumper.DumpData("dfs_other", dfs_other_count); + dumper.DumpData("dfs_other_hang", dfs_other_hang); } - LOG(INFO) << "[Dfs] read " << leveldb::dfs_read_counter.Clear() << " " - << leveldb::dfs_read_hang_counter.Get() << " " + LOG(INFO) << "[Dfs] read " << dfs_read_count << " " + << dfs_read_hang << " " << "rdelay " << rdelay << " " - << "write " << leveldb::dfs_write_counter.Clear() << " " - << leveldb::dfs_write_hang_counter.Get() << " " + << "rdelay_total " << dfs_read_delay << " " + << "write " << dfs_write_count << " " + << dfs_write_hang << " " << "wdelay " << wdelay << " " - << "sync " << leveldb::dfs_sync_counter.Clear() << " " - << leveldb::dfs_sync_hang_counter.Get() << " " + << "wdelay_total " << dfs_write_delay << " " + << "sync " << dfs_sync_count << " " + << dfs_sync_hang << " " << "sdelay " << sdelay << " " - << "flush " << leveldb::dfs_flush_counter.Clear() << " " - << leveldb::dfs_flush_hang_counter.Get() << " " - << "list " << leveldb::dfs_list_counter.Clear() << " " - << leveldb::dfs_list_hang_counter.Get() << " " - << "info " << leveldb::dfs_info_counter.Clear() << " " - << leveldb::dfs_info_hang_counter.Get() << " " - << "exists " << leveldb::dfs_exists_counter.Clear() << " " - << leveldb::dfs_exists_hang_counter.Get() << " " - << "open " << leveldb::dfs_open_counter.Clear() << " " - << leveldb::dfs_open_hang_counter.Get() << " " - << "close " << leveldb::dfs_close_counter.Clear() << " " - << leveldb::dfs_close_hang_counter.Get() << " " - << "delete " << leveldb::dfs_delete_counter.Clear() << " " - << leveldb::dfs_delete_hang_counter.Get() << " " - << "tell " << leveldb::dfs_tell_counter.Clear() << " " - << leveldb::dfs_tell_hang_counter.Get() << " " - << "other " << leveldb::dfs_other_counter.Clear() << " " - << leveldb::dfs_other_hang_counter.Get(); + << "sdelay_total " << dfs_sync_delay << " " + << "flush " << dfs_flush_count << " " + << dfs_flush_hang << " " + << "list " << dfs_list_count << " " + << dfs_list_hang << " " + << "info " << dfs_info_count << " " + << dfs_info_hang << " " + << "exists " << dfs_exists_count << " " + << dfs_exists_hang << " " + << "open " << dfs_open_count << " " + << dfs_open_hang << " " + << "close " << dfs_close_count << " " + << dfs_close_hang << " " + << "delete " << dfs_delete_count << " " + << dfs_delete_hang << " " + << "tell " << dfs_tell_count << " " + << dfs_tell_hang << " " + << "other " << dfs_other_count << " " + << dfs_other_hang; // local info + int64_t posix_read_count = latest_report->FindMetricValue(kPosixReadCountMetric); + int64_t posix_write_count = latest_report->FindMetricValue(kPosixWriteCountMetric); + int64_t posix_sync_count = latest_report->FindMetricValue(kPosixSyncCountMetric); + int64_t posix_list_count = latest_report->FindMetricValue(kPosixListCountMetric); + int64_t posix_info_count = latest_report->FindMetricValue(kPosixInfoCountMetric); + int64_t posix_exists_count = latest_report->FindMetricValue(kPosixExistsCountMetric); + int64_t posix_open_count = latest_report->FindMetricValue(kPosixOpenCountMetric); + int64_t posix_close_count = latest_report->FindMetricValue(kPosixCloseCountMetric); + int64_t posix_delete_count = latest_report->FindMetricValue(kPosixDeleteCountMetric); + int64_t posix_tell_count = latest_report->FindMetricValue(kPosixTellCountMetric); + int64_t posix_seek_count = latest_report->FindMetricValue(kPosixSeekCountMetric); + int64_t posix_other_count = latest_report->FindMetricValue(kPosixOtherCountMetric); if (FLAGS_tera_tabletnode_dump_running_info) { - dumper.DumpData("local_read", leveldb::posix_read_counter.Get()); - dumper.DumpData("local_write", leveldb::posix_write_counter.Get()); - dumper.DumpData("local_sync", leveldb::posix_sync_counter.Get()); - dumper.DumpData("local_list", leveldb::posix_list_counter.Get()); - dumper.DumpData("local_info", leveldb::posix_info_counter.Get()); - dumper.DumpData("local_exists", leveldb::posix_exists_counter.Get()); - dumper.DumpData("local_open", leveldb::posix_open_counter.Get()); - dumper.DumpData("local_close", leveldb::posix_close_counter.Get()); - dumper.DumpData("local_delete", leveldb::posix_delete_counter.Get()); - dumper.DumpData("local_tell", leveldb::posix_tell_counter.Get()); - dumper.DumpData("local_seek", leveldb::posix_seek_counter.Get()); - dumper.DumpData("local_other", leveldb::posix_other_counter.Get()); + dumper.DumpData("local_read", posix_read_count); + dumper.DumpData("local_write", posix_write_count); + dumper.DumpData("local_sync", posix_sync_count); + dumper.DumpData("local_list", posix_list_count); + dumper.DumpData("local_info", posix_info_count); + dumper.DumpData("local_exists", posix_exists_count); + dumper.DumpData("local_open", posix_open_count); + dumper.DumpData("local_close", posix_close_count); + dumper.DumpData("local_delete", posix_delete_count); + dumper.DumpData("local_tell", posix_tell_count); + dumper.DumpData("local_seek", posix_seek_count); + dumper.DumpData("local_other", posix_other_count); } - LOG(INFO) << "[Local] read " << leveldb::posix_read_counter.Clear() << " " - << "write " << leveldb::posix_write_counter.Clear() << " " - << "sync " << leveldb::posix_sync_counter.Clear() << " " - << "list " << leveldb::posix_list_counter.Clear() << " " - << "info " << leveldb::posix_info_counter.Clear() << " " - << "exists " << leveldb::posix_exists_counter.Clear() << " " - << "open " << leveldb::posix_open_counter.Clear() << " " - << "close " << leveldb::posix_close_counter.Clear() << " " - << "delete " << leveldb::posix_delete_counter.Clear() << " " - << "tell " << leveldb::posix_tell_counter.Clear() << " " - << "seek " << leveldb::posix_seek_counter.Clear() << " " - << "other " << leveldb::posix_other_counter.Clear(); + LOG(INFO) << "[Local] read " << posix_read_count << " " + << "write " << posix_write_count << " " + << "sync " << posix_sync_count << " " + << "list " << posix_list_count << " " + << "info " << posix_info_count << " " + << "exists " << posix_exists_count << " " + << "open " << posix_open_count << " " + << "close " << posix_close_count << " " + << "delete " << posix_delete_count << " " + << "tell " << posix_tell_count << " " + << "seek " << posix_seek_count << " " + << "other " << posix_other_count; } } // namespace tabletnode diff --git a/src/tabletnode/tabletnode_sysinfo.h b/src/tabletnode/tabletnode_sysinfo.h index 453f2df95..c20a2b519 100644 --- a/src/tabletnode/tabletnode_sysinfo.h +++ b/src/tabletnode/tabletnode_sysinfo.h @@ -50,15 +50,9 @@ class TabletNodeSysInfo { private: TabletNodeInfo info_; TabletMetaList tablet_list_; - int64_t mem_check_ts_; - int64_t net_check_ts_; - int64_t io_check_ts_; - int64_t net_tx_total_; - int64_t net_rx_total_; - int64_t cpu_check_ts_; - - int64_t tablet_check_ts_; + mutable Mutex mutex_; + int64_t last_check_ts_; }; } // namespace tabletnode } // namespace tera diff --git a/src/tabletnode/tabletnode_zk_adapter.cc b/src/tabletnode/tabletnode_zk_adapter.cc old mode 100644 new mode 100755 index 6c9ab06e0..d3e3d7322 --- a/src/tabletnode/tabletnode_zk_adapter.cc +++ b/src/tabletnode/tabletnode_zk_adapter.cc @@ -422,6 +422,9 @@ void InsTabletNodeZkAdapter::OnKickMarkCreated() { } void InsTabletNodeZkAdapter::OnLockChange(std::string session_id, bool deleted) { + LOG(INFO) << "[OnLockChange] session_id = " << session_id + << " deleted = " << deleted + << " now_session_id = " << ins_sdk_->GetSessionID(); if (deleted || session_id != ins_sdk_->GetSessionID()) { LOG(ERROR) << "I lost my lock , so quit"; _Exit(EXIT_FAILURE); diff --git a/src/tabletnode/test/tabletnode_impl_test.cc b/src/tabletnode/test/tabletnode_impl_test.cc index 808250b02..efc1d61b7 100644 --- a/src/tabletnode/test/tabletnode_impl_test.cc +++ b/src/tabletnode/test/tabletnode_impl_test.cc @@ -16,7 +16,7 @@ #include "proto/proto_helper.h" #include "io/mock_tablet_io.h" -DECLARE_bool(tera_zk_enabled); +DECLARE_string(tera_coord_type); DECLARE_int32(tera_tabletnode_retry_period); DECLARE_string(tera_leveldb_env_type); @@ -40,7 +40,7 @@ class TabletNodeImplTest : public ::testing::Test { m_ret_io_split(false), m_start_key("start_key"), m_end_key("end_key"), m_schema(DefaultTableSchema()) { - FLAGS_tera_zk_enabled = false; + FLAGS_tera_coord_type = "fake_zk"; m_tablet_meta.set_table_name("name"); m_tablet_meta.set_path("path"); diff --git a/src/tabletnode/test/tabletnode_sysinfo_test.cc b/src/tabletnode/test/tabletnode_sysinfo_test.cc index 4f4c06724..e15c83a7c 100644 --- a/src/tabletnode/test/tabletnode_sysinfo_test.cc +++ b/src/tabletnode/test/tabletnode_sysinfo_test.cc @@ -5,7 +5,7 @@ #define private public #include "tabletnode_sysinfo.h" -#include "utils/timer.h" +#include "common/timer.h" #include "gtest/gtest.h" namespace tera { diff --git a/src/tera_c.cc b/src/tera_c.cc index fd3fb2994..cd10eb1ba 100644 --- a/src/tera_c.cc +++ b/src/tera_c.cc @@ -39,7 +39,7 @@ static bool SaveError(char** errptr, const ErrorCode& s) { } if (errptr == NULL) { fprintf(stderr, "%s tera error: %s.\n", - common::timer::get_curtime_str().c_str(), s.GetReason().c_str()); + tera::get_curtime_str().c_str(), s.GetReason().c_str()); return true; } @@ -164,7 +164,7 @@ bool tera_table_put_kv(tera_table_t* table, const char* key, uint64_t keylen, delete mutation; if (SaveError(errptr, err)) { fprintf(stderr, "%s tera error: %s.\n", - common::timer::get_curtime_str().c_str(), err.GetReason().c_str()); + tera::get_curtime_str().c_str(), err.GetReason().c_str()); return false; } return true; @@ -197,7 +197,7 @@ bool tera_table_delete(tera_table_t* table, const char* row_key, uint64_t keylen delete mutation; if (SaveError(NULL, err)) { fprintf(stderr, "%s tera delete error: %s.\n", - common::timer::get_curtime_str().c_str(), err.GetReason().c_str()); + tera::get_curtime_str().c_str(), err.GetReason().c_str()); return false; } return true; diff --git a/src/tera_flags.cc b/src/tera_flags.cc old mode 100644 new mode 100755 index 70dba8404..b1364506d --- a/src/tera_flags.cc +++ b/src/tera_flags.cc @@ -19,8 +19,10 @@ DEFINE_int32(tera_heartbeat_retry_times, 5, "the max retry times when fail to se DEFINE_string(tera_working_dir, "./", "the base dir for system data"); -DEFINE_bool(tera_zk_enabled, true, "enable zk adapter to collaborate with other master instances"); -DEFINE_bool(tera_mock_zk_enabled, false, "enable mock zk adapter to collaborate with other master instances"); +DEFINE_string(tera_coord_type, "", "the coordinator service type for tera cluster [zk,ins,mock_zk,mock_ins,fake_zk]"); + +DEFINE_bool(tera_zk_enabled, true, "[obsoleted replace by --tera_coord_type=zk] enable zk adapter to coord"); +DEFINE_bool(tera_mock_zk_enabled, false, "[obsoleted replace by --tera_coord_type=mock_zk] enable mock zk adapter to coord"); DEFINE_string(tera_zk_addr_list, "localhost:2180", "zookeeper server list"); DEFINE_string(tera_zk_root_path, "/tera", "zookeeper root path"); DEFINE_string(tera_fake_zk_path_prefix, "../fakezk", "fake zk path prefix in onebox tera"); @@ -31,6 +33,12 @@ DEFINE_string(tera_zk_lib_log_path, "../log/zk.log", "zookeeper library log outp DEFINE_string(tera_log_prefix, "", "prefix of log file (INFO, WARNING)"); DEFINE_string(tera_local_addr, "", "local host's ip address"); DEFINE_bool(tera_online_schema_update_enabled, false, "enable online-schema-update"); +DEFINE_bool(tera_info_log_clean_enable, true, "enable log cleaner task, enable as default"); +DEFINE_int64(tera_info_log_clean_period_second, 2592000, "time period (in second) for log cleaner task, 30 days as default"); +DEFINE_int64(tera_info_log_expire_second, 2592000, "expire time (in second) of log file, 30 days as default"); +DEFINE_bool(tera_metric_http_server_enable, true, "enable metric http server, enable as default"); +DEFINE_int32(tera_metric_http_server_listen_port, 20221, "listen port for metric http server"); +DEFINE_int64(tera_hardware_collect_period_second, 5, "hardware metrics checking period (in second)"); ///////// io ///////// @@ -100,10 +108,14 @@ DEFINE_int32(tera_master_impl_retry_times, 5, "the max retry times when master i DEFINE_string(tera_master_meta_table_name, "meta_table", "the meta table name"); DEFINE_string(tera_master_meta_table_path, "meta", "the path of meta table"); -DEFINE_double(tera_master_workload_split_threshold, 3.5, "if workload(wwl) > 3.5, halve the splitsize"); +DEFINE_double(tera_master_workload_merge_threshold, 1.0, "if workload(wwl) < 1.0, enable merge on this tablet"); +DEFINE_double(tera_master_workload_split_threshold, 9.9, "if workload(wwl) > 9.9, trigger split by workload"); DEFINE_int64(tera_master_split_tablet_size, 512, "the size (in MB) of tablet to trigger split"); +DEFINE_int64(tera_master_min_split_size, 64, "the size (in MB) of tablet to trigger split"); +DEFINE_double(tera_master_min_split_ratio, 0.25, "min ratio of split size of tablet schema to trigger split"); +DEFINE_int64(tera_master_split_history_time_interval, 600000, "minimal split time interval(ms)"); DEFINE_int64(tera_master_merge_tablet_size, 0, "the size (in MB) of tablet to trigger merge"); -DEFINE_string(tera_master_gc_strategy, "incremental", "gc strategy, [default, incremental, trackable]"); +DEFINE_string(tera_master_gc_strategy, "trackable", "gc strategy, [default, trackable]"); DEFINE_int32(tera_master_max_split_concurrency, 1, "the max concurrency of tabletnode for split tablet"); DEFINE_int32(tera_master_max_load_concurrency, 5, "the max concurrency of tabletnode for load tablet"); @@ -118,10 +130,11 @@ DEFINE_bool(tera_master_move_tablet_enabled, true, "enable master to auto move t DEFINE_bool(tera_master_meta_isolate_enabled, false, "enable master to reserve a tabletnode for meta"); DEFINE_bool(tera_master_load_balance_table_grained, true, "whether the load balance policy only consider the specified table"); DEFINE_double(tera_master_load_balance_size_ratio_trigger, 1.2, "ratio of heaviest node size to lightest to trigger load balance"); -DEFINE_int32(tera_master_load_balance_ts_load_threshold, 5000, "threshold of one tabletnode in QPS load-balance decision"); +DEFINE_int32(tera_master_load_balance_ts_load_threshold, 1000000000, "threshold of one tabletnode in QPS load-balance decision"); +DEFINE_int64(tera_master_load_balance_ts_size_threshold, 0, "threshold of one tabletnode in Size load-balance decision"); DEFINE_int32(tera_master_load_balance_scan_weight, 300, "scan weight in load-balance decision"); -DEFINE_double(tera_safemode_tablet_locality_ratio, 0.3, "the tablet locality ratio threshold of safemode"); +DEFINE_double(tera_safemode_tablet_locality_ratio, 0.9, "the tablet locality ratio threshold of safemode"); DEFINE_bool(tera_master_kick_tabletnode_enabled, true, "enable master to kick tabletnode"); DEFINE_int32(tera_master_kick_tabletnode_query_fail_times, 10, "the number of query fail to kick tabletnode"); DEFINE_int32(tera_master_control_tabletnode_retry_period, 60000, "the retry period (in ms) for master control tabletnode"); @@ -147,27 +160,31 @@ DEFINE_int64(tera_master_stat_table_interval, 60, "interval of system status dum DEFINE_int64(tera_master_stat_table_splitsize, 100, "default split size of stat table"); DEFINE_int32(tera_master_gc_period, 60000, "the period (in ms) for master gc"); +DEFINE_bool(tera_master_gc_trash_enabled, true, "enable master gc trash"); +DEFINE_int64(tera_master_gc_trash_expire_time_s, 86400, "time (in second) for gc file keeped in trash"); +DEFINE_int64(tera_master_gc_trash_clean_period_s, 3600, "period (in second) for clean gc trash"); DEFINE_int64(tera_master_ins_session_timeout, 10000000, "ins session timeout(us), default 10sec"); DEFINE_bool(tera_master_availability_check_enabled, true, "whether execute availability check"); // reload config safety DEFINE_bool(tera_master_availability_show_details_enabled, false, "whether show details of not-ready tablets"); // reload config safety DEFINE_int64(tera_master_not_available_threshold, 0, "the threshold (in s) of not available"); // reload config safety DEFINE_int64(tera_master_availability_check_period, 60, "the period (in s) of availability check"); // reload config safety -DEFINE_int64(tera_master_availability_warning_threshold, 30, "30s, the threshold (in s) of warning availability"); // reload config safety -DEFINE_int64(tera_master_availability_error_threshold, 300, "5 minutes, the threshold (in s) of error availability"); // reload config safety -DEFINE_int64(tera_master_availability_fatal_threshold, 1800, "30 minutes, the threshold (in s) of fatal availability"); // reload config safety +DEFINE_int64(tera_master_availability_warning_threshold, 60, "1 minute, the threshold (in s) of warning availability"); // reload config safety +DEFINE_int64(tera_master_availability_error_threshold, 600, "10 minutes, the threshold (in s) of error availability"); // reload config safety +DEFINE_int64(tera_master_availability_fatal_threshold, 3600, "1 hour, the threshold (in s) of fatal availability"); // reload config safety +DEFINE_bool(tera_master_update_split_meta, true, "[split] update child tablets meta from master"); ///////// tablet node ///////// DEFINE_string(tera_tabletnode_port, "20000", "the tablet node port of tera system"); -DEFINE_int32(tera_tabletnode_ctrl_thread_num, 10, "control thread number of tablet node (query/load/unload/split)"); +DEFINE_int32(tera_tabletnode_ctrl_thread_num, 20, "control thread number of tablet node (query/load/unload/split)"); DEFINE_int32(tera_tabletnode_write_thread_num, 10, "write thread number of tablet node"); DEFINE_int32(tera_tabletnode_read_thread_num, 40, "read thread number of tablet node"); -DEFINE_int32(tera_tabletnode_scan_thread_num, 5, "scan thread number of tablet node"); +DEFINE_int32(tera_tabletnode_scan_thread_num, 30, "scan thread number of tablet node"); DEFINE_int32(tera_tabletnode_manual_compact_thread_num, 2, "the manual compact thread number of tablet node server"); DEFINE_int32(tera_tabletnode_impl_thread_min_num, 1, "the min thread number for tablet node impl operations"); DEFINE_int32(tera_tabletnode_impl_thread_max_num, 10, "the max thread number for tablet node impl operations"); -DEFINE_int32(tera_tabletnode_compact_thread_num, 10, "the max thread number for leveldb compaction"); +DEFINE_int32(tera_tabletnode_compact_thread_num, 30, "the max thread number for leveldb compaction"); DEFINE_int32(tera_tabletnode_scanner_cache_size, 5, "default tablet scanner manager cache no more than 100 stream"); DEFINE_int32(tera_tabletnode_connect_retry_times, 5, "the max retry times when connect to tablet node"); @@ -180,16 +197,20 @@ DEFINE_int32(tera_tabletnode_scan_pack_max_size, 10240, "the max size(KB) of the DEFINE_int32(tera_asyncwriter_pending_limit, 10000, "the max pending data size (KB) in async writer"); DEFINE_bool(tera_enable_level0_limit, true, "enable level0 limit"); -DEFINE_int32(tera_tablet_level0_file_limit, 20000, "the max level0 file num before write busy"); +DEFINE_int32(tera_tablet_level0_file_limit, 500, "the max level0 file num before write busy"); DEFINE_int32(tera_tablet_ttl_percentage, 99, "percentage of ttl tag in sst file begin to trigger compaction"); DEFINE_int32(tera_tablet_del_percentage, 20, "percentage of del tag in sst file begin to trigger compaction"); -DEFINE_int32(tera_asyncwriter_sync_interval, 100, "the interval (in ms) to sync write buffer to disk"); +DEFINE_int32(tera_asyncwriter_sync_interval, 10, "the interval (in ms) to sync write buffer to disk"); DEFINE_int32(tera_asyncwriter_sync_size_threshold, 1024, "force sync per X KB"); DEFINE_int32(tera_asyncwriter_batch_size, 1024, "write batch to leveldb per X KB"); DEFINE_int32(tera_request_pending_limit, 100000, "the max read/write request pending"); DEFINE_int32(tera_scan_request_pending_limit, 1000, "the max scan request pending"); DEFINE_int32(tera_garbage_collect_period, 1800, "garbage collect period in s"); DEFINE_int32(tera_garbage_collect_debug_log, 0, "garbage collect debug log"); +DEFINE_bool(tera_leveldb_ignore_corruption_in_open, false, "ignore fs error when open db"); +DEFINE_int32(tera_leveldb_slow_down_level0_score_limit, 100, "control level 0 score compute, score / 2 or sqrt(score / 2)"); +DEFINE_int32(tera_leveldb_max_background_compactions, 8, "multi-thread compaction number"); +DEFINE_int32(tera_tablet_max_sub_parallel_compaction, 10, "max sub compaction in parallel"); DEFINE_int32(tera_tabletnode_write_meta_rpc_timeout, 60000, "the timeout period (in ms) for tabletnode write meta"); DEFINE_int32(tera_tabletnode_retry_period, 100, "the retry interval period (in ms) when operate tablet"); @@ -219,6 +240,7 @@ DEFINE_int32(tera_tabletnode_tcm_cache_release_period, 180, "the period (in sec) DEFINE_int64(tera_tabletnode_tcm_cache_size, 838860800, "TCMALLOC_MAX_TOTAL_THREAD_CACHE_BYTES"); DEFINE_bool(tera_tabletnode_dump_running_info, true, "dump tabletnode running info"); DEFINE_string(tera_tabletnode_running_info_dump_file, "../monitor/ts.info.data", "file path for dump running info"); +DEFINE_int64(tera_tabletnode_sysinfo_check_interval, 9223372036854775806, "sysinfo check db health interval in us, default int64_max - 1"); ///////// SDK ///////// DEFINE_string(tera_sdk_impl_type, "tera", "the activated type of SDK impl"); @@ -248,9 +270,9 @@ DEFINE_int32(tera_sdk_timeout_precision, 100, "precision of sdk read/write timeo DEFINE_int32(tera_sdk_delay_send_internal, 2, "the sdk resend the request internal time(s)"); DEFINE_int32(tera_sdk_scan_buffer_limit, 2048000, "the pack size limit for scan operation"); DEFINE_bool(tera_sdk_write_sync, false, "sync flag for write"); -DEFINE_int32(tera_sdk_batch_size, 100, "batch_size"); -DEFINE_int32(tera_sdk_write_send_interval, 100, "write batch send interval time"); -DEFINE_int32(tera_sdk_read_send_interval, 10, "read batch send interval time"); +DEFINE_int32(tera_sdk_batch_size, 250, "batch_size"); +DEFINE_int32(tera_sdk_write_send_interval, 10, "write batch send interval time"); +DEFINE_int32(tera_sdk_read_send_interval, 5, "read batch send interval time"); DEFINE_int64(tera_sdk_max_mutation_pending_num, INT64_MAX, "default number of pending mutations in async put op"); DEFINE_int64(tera_sdk_max_reader_pending_num, INT64_MAX, "default number of pending readers in async get op"); DEFINE_bool(tera_sdk_async_blocking_enabled, true, "enable blocking when async writing and reading"); @@ -264,24 +286,120 @@ DEFINE_int32(tera_sdk_cookie_update_interval, 600, "the interval of cookie updat DEFINE_bool(tera_sdk_perf_counter_enabled, true, "enable performance counter log"); DEFINE_int64(tera_sdk_perf_counter_log_interval, 60, "the interval period (in sec) of performance counter log dumping"); +DEFINE_bool(tera_sdk_perf_collect_enabled, false, "enable collect perf counter for metrics"); +DEFINE_int32(tera_sdk_perf_collect_interval, 10000, "the interval of collect perf counter(ms)"); DEFINE_bool(tera_sdk_batch_scan_enabled, true, "enable batch scan"); DEFINE_int64(tera_sdk_scan_buffer_size, 65536, "default buffer limit for scan"); DEFINE_int64(tera_sdk_scan_number_limit, 1000000000, "default number limit for scan"); DEFINE_int32(tera_sdk_max_batch_scan_req, 30, "the max number of concurrent scan req"); -DEFINE_int32(tera_sdk_batch_scan_max_retry, 60, "the max retry times for session scan"); DEFINE_int64(tera_sdk_scan_timeout, 30000, "scan timeout"); +DEFINE_int32(tera_sdk_batch_scan_max_retry, 60, "the max retry times for session scan"); DEFINE_int64(batch_scan_delay_retry_in_us, 1000000, "timewait in us before retry batch scan"); +DEFINE_int32(tera_sdk_sync_scan_max_retry, 10, "the max retry times for sync scan"); +DEFINE_int64(sync_scan_delay_retry_in_ms, 1000, "timewait in ms before retry sync scan"); DEFINE_string(tera_ins_addr_list, "", "the ins cluster addr. e.g. abc.com:1234,abb.com:1234"); DEFINE_string(tera_ins_root_path, "", "root path on ins. e.g /ps/sandbox"); -DEFINE_bool(tera_ins_enabled, false, "option to open ins naming"); -DEFINE_bool(tera_mock_ins_enabled, false, "option to open mock ins naming"); +DEFINE_bool(tera_ins_enabled, false, "[obsoleted replace by --tera_coord_type=ins] option to open ins naming"); +DEFINE_bool(tera_mock_ins_enabled, false, "[obsoleted replace by --tera_coord_type=mock_ins] option to open mock ins naming"); DEFINE_int64(tera_ins_session_timeout, 600000000, "ins session timeout(us), default 10min"); +DEFINE_int64(tera_sdk_ins_session_timeout, 10000000, "ins session timeout(us), default 10s"); DEFINE_int64(tera_sdk_status_timeout, 600, "(s) check tablet/tabletnode status timeout"); +DEFINE_uint64(tera_sdk_read_max_qualifiers, 18446744073709551615U, "read qu limit of each cf, default value is the max of uint64"); ///////// http ///////// DEFINE_string(tera_http_port, "8657", "the http proxy port of tera"); DEFINE_int32(tera_http_request_thread_num, 30, "the http proxy thread num for handle client request"); DEFINE_int32(tera_http_ctrl_thread_num, 10, "the http proxy thread num for it self"); + +///////// timeoracle ///////// +DEFINE_string(tera_timeoracle_port, "30000", "the timeoracle port of tera"); +DEFINE_int32(tera_timeoracle_max_lease_second, 30, "timeoracle work this seconds for a lease"); +DEFINE_int32(tera_timeoracle_refresh_lease_second, 10, "timeoracle refresh lease before this seconds"); + +// only used by timeoracle +DEFINE_bool(tera_timeoracle_mock_enabled, false, "used local filesystem replace zk and ins."); +DEFINE_string(tera_timeoracle_mock_root_path, "/tmp/", "the root path of local filesystem."); +DEFINE_int32(tera_timeoracle_work_thread_num, 16, "timeoracle sofarpc server work_thread_number"); +DEFINE_int32(tera_timeoracle_io_service_pool_size, 4, "timeoracle sofarpc server io_service_pool_size"); + +///////// global transaction //////// +DEFINE_bool(tera_sdk_client_for_gtxn, false, "build thread_pool for global transaction"); +DEFINE_bool(tera_sdk_tso_client_enabled, false, "get timestamp from timeoracle, default from local timestamp"); +DEFINE_int32(tera_gtxn_thread_max_num, 20, "the max thread number for global transaction operations"); +DEFINE_int32(tera_gtxn_timeout_ms, 600000, "global transaction timeout limit (ms) default 10 minutes"); +DEFINE_int32(tera_gtxn_get_waited_times_limit, 10, "global txn wait other locked times limit"); +DEFINE_int32(tera_gtxn_all_puts_size_limit, 10000, "global txn all puts data size limit"); + +//////// observer /////// +DEFINE_int32(observer_proc_thread_num, 3, ""); +DEFINE_int64(observer_max_pending_task, 10000, ""); +DEFINE_int32(observer_scanner_thread_num, 20, ""); +DEFINE_int32(observer_read_thread_num, 20, "observer read thread num"); +DEFINE_int32(observer_ack_conflict_timeout, 3600, "timeout for ack column conflict check"); +DEFINE_int32(observer_rowlock_client_thread_num, 20, ""); + +//////// rowlock server //////// +DEFINE_bool(rowlock_rpc_limit_enabled, false, "enable the rpc traffic limit in sdk"); +DEFINE_int32(rowlock_rpc_limit_max_inflow, 10, "the max bandwidth (in MB/s) for sdk rpc traffic limitation on input flow"); +DEFINE_int32(rowlock_rpc_limit_max_outflow, 10, "the max bandwidth (in MB/s) for sdk rpc traffic limitation on output flow"); +DEFINE_int32(rowlock_rpc_max_pending_buffer_size, 200, "max pending buffer size (in MB) for sdk rpc"); +DEFINE_int32(rowlock_rpc_work_thread_num, 2, "thread num of sdk rpc client"); + +DEFINE_string(rowlock_server_ip, "0.0.0.0", "rowlock server ip"); +DEFINE_string(rowlock_server_port, "22222", "rowlock server port"); +DEFINE_string(rowlock_zk_root_path, "/rowlock", ""); +DEFINE_int32(rowlock_zk_timeout, 10000, "zk timeout"); +DEFINE_string(rowlock_ins_root_path, "/rowlock", "ins rowlock root path"); +DEFINE_int32(rowlock_server_node_num, 1, "number of rowlock servers in cluster"); + +DEFINE_int32(rowlock_db_ttl, 600000, "timeout for an unlocked lock, 10min"); +DEFINE_int32(rowlock_timing_wheel_patch_num, 600, "the number of timing wheel, every patch_num step the oldest data will be cleared"); +DEFINE_int32(rowlock_db_sharding_number, 1024, "sharding number, enhance concurrency"); +DEFINE_string(rowlock_fake_root_path, "../fakezk/rowlock", "one box fake zk root path"); +DEFINE_int32(rowlock_thread_max_num, 20, "the max thread number of rowlock server"); +DEFINE_int32(rowlock_client_max_fail_times, 5, "client max failure time"); + +DEFINE_bool(rowlock_proxy_async_enable, false, "sync | async"); +DEFINE_string(rowlock_proxy_port, "22223", "rowlock proxy port"); +///////// load balancer //////// +DEFINE_string(tera_lb_server_addr, "0.0.0.0", "default load balancer rpc server addr"); +DEFINE_string(tera_lb_server_port, "31000", "default load balancer rpc server port"); +DEFINE_int32(tera_lb_server_thread_num, 2, "default load balancer rpc server thread pool num"); +DEFINE_int32(tera_lb_impl_thread_num, 1, "default load balancer impl thread pool num"); +DEFINE_int32(tera_lb_load_balance_period_s, 300, "default load balance period(s)"); +DEFINE_int32(tera_lb_max_compute_steps, 1000000, "default max compute steps for one balance procedure"); +DEFINE_int32(tera_lb_max_compute_steps_per_tablet, 1000, "default max compute steps per tablet for one balance procedure"); +DEFINE_int32(tera_lb_max_compute_time_ms, 30000, "default max compute time(ms) for one balance procedure"); +DEFINE_double(tera_lb_min_cost_need_balance, 0.1, "min cost needed for balance"); +DEFINE_double(tera_lb_move_count_cost_weight, 10, "move cost weight"); +DEFINE_int32(tera_lb_tablet_max_move_num, 10, "default tablet max move num for one balance procedure"); +DEFINE_double(tera_lb_tablet_max_move_percent, 0.001, "default tablet max move percent for one balance procedure"); +DEFINE_double(tera_lb_move_frequency_cost_weight, 10, "move frequency cost weight"); +DEFINE_int32(tera_lb_tablet_move_too_frequently_threshold_s, 600, "if move a tablet in this threshold time(s) again, it's been moved too frequently"); +DEFINE_double(tera_lb_abnormal_node_cost_weight, 10, "abnormal node cost weight"); +DEFINE_double(tera_lb_abnormal_node_ratio, 0.5, "abnormal node ratio"); +DEFINE_double(tera_lb_read_pending_node_cost_weight, 10, "read pending node cost weight"); +DEFINE_double(tera_lb_write_pending_node_cost_weight, 10, "write pending node cost weight"); +DEFINE_double(tera_lb_scan_pending_node_cost_weight, 10, "scan pending node cost weight"); +DEFINE_double(tera_lb_tablet_count_cost_weight, 0, "tablet count cost weight"); +DEFINE_double(tera_lb_size_cost_weight, 100, "size cost weight"); +DEFINE_double(tera_lb_read_load_cost_weight, 0, "read load cost weight"); +DEFINE_double(tera_lb_write_load_cost_weight, 0, "write load cost weight"); +DEFINE_double(tera_lb_scan_load_cost_weight, 0, "scan load cost weight"); +DEFINE_bool(tera_lb_debug_mode_enabled, false, "debug mode"); + +DEFINE_int32(rowlock_io_service_pool_size, 4, "rowlock server sofarpc server io_service_pool_size"); + +DEFINE_bool(mock_rowlock_enable, false, "test case switch"); +DEFINE_int64(tera_metric_hold_max_time, 300000, "interval of prometheus collectors push a value to hold_queue in ms"); + +////////// PROFILER /////////// +DEFINE_bool(cpu_profiler_enabled, false, "enable cpu profiler"); +DEFINE_bool(heap_profiler_enabled, false, "enable heap profiler"); +DEFINE_int32(cpu_profiler_dump_interval, 120, "cpu profiler dump interval"); +DEFINE_int32(heap_profiler_dump_interval, 120, "heap profiler dump interval"); +DEFINE_int64(heap_profile_allocation_interval, 1073741824, "Env variable for heap profiler's allocation interval"); +DEFINE_int64(heap_profile_inuse_interval, 1073741824, "Env variable for heap profiler's inuse interval"); diff --git a/src/tera_main.cc b/src/tera_main.cc index 2331436b9..aa86c952f 100644 --- a/src/tera_main.cc +++ b/src/tera_main.cc @@ -8,12 +8,20 @@ #include #include "common/base/scoped_ptr.h" +#include "common/log/log_cleaner.h" +#include "common/heap_profiler.h" +#include "common/cpu_profiler.h" #include "tera_entry.h" #include "utils/utils_cmd.h" #include "version.h" +DECLARE_bool(cpu_profiler_enabled); +DECLARE_bool(heap_profiler_enabled); +DECLARE_int32(cpu_profiler_dump_interval); +DECLARE_int32(heap_profiler_dump_interval); DECLARE_string(tera_log_prefix); DECLARE_string(tera_local_addr); +DECLARE_bool(tera_info_log_clean_enable); extern std::string GetTeraEntryName(); extern tera::TeraEntry* GetTeraEntry(); @@ -27,11 +35,25 @@ static void SignalIntHandler(int sig) { int main(int argc, char** argv) { ::google::ParseCommandLineFlags(&argc, &argv, true); ::google::InitGoogleLogging(argv[0]); - if (!FLAGS_tera_log_prefix.empty()) { - tera::utils::SetupLog(FLAGS_tera_log_prefix); - } else { - tera::utils::SetupLog(GetTeraEntryName()); + + + if (FLAGS_tera_log_prefix.empty()) { + FLAGS_tera_log_prefix = GetTeraEntryName(); + if (FLAGS_tera_log_prefix.empty()) { + FLAGS_tera_log_prefix = "tera"; + } } + tera::utils::SetupLog(FLAGS_tera_log_prefix); + + tera::CpuProfiler cpu_profiler; + cpu_profiler.SetEnable(FLAGS_cpu_profiler_enabled) + .SetInterval(FLAGS_cpu_profiler_dump_interval) + .SetProfilerFile("Cpu"); + + tera::HeapProfiler heap_profiler; + heap_profiler.SetEnable(FLAGS_heap_profiler_enabled) + .SetInterval(FLAGS_heap_profiler_dump_interval) + .SetProfilerFile("Heap"); if (argc > 1) { std::string ext_cmd = argv[1]; @@ -52,6 +74,14 @@ int main(int argc, char** argv) { if (!entry->Start()) { return -1; } + + // start log cleaner + if (FLAGS_tera_info_log_clean_enable) { + common::LogCleaner::StartCleaner(); + LOG(INFO) << "start log cleaner"; + } else { + LOG(INFO) << "log cleaner is disable"; + } while (!g_quit) { if (!entry->Run()) { @@ -63,6 +93,8 @@ int main(int argc, char** argv) { LOG(INFO) << "received interrupt signal from user, will stop"; } + common::LogCleaner::StopCleaner(); + if (!entry->Shutdown()) { return -1; } diff --git a/src/tera_test_main.cc b/src/tera_test_main.cc index f7fb788c7..915c172d9 100644 --- a/src/tera_test_main.cc +++ b/src/tera_test_main.cc @@ -34,7 +34,6 @@ DEFINE_int64(pending_num, 100000, ""); DECLARE_string(flagfile); using namespace tera; -using namespace common::timer; void Usage(const std::string& prg_name) { std::cout << "DESCRIPTION \n\ @@ -43,13 +42,13 @@ void Usage(const std::string& prg_name) { version \n"; } -static common::Counter w_pending; -static common::Counter w_succ; -static common::Counter w_total; -static common::Counter r_pending; -static common::Counter r_succ; -static common::Counter r_total; -static common::Counter launch_time; +static Counter w_pending; +static Counter w_succ; +static Counter w_total; +static Counter r_pending; +static Counter r_succ; +static Counter r_total; +static Counter launch_time; void PrintStat() { LOG(INFO) << "Write total " << w_total.Get() @@ -298,7 +297,7 @@ int32_t SharedTableImplTest(int32_t argc, char** argv, ErrorCode* err) { thread_pool.AddTask(task); } while (thread_pool.PendingNum() > 0) { - std::cerr << common::timer::get_time_str(time(NULL)) << " " + std::cerr << get_time_str(time(NULL)) << " " << "waiting for test finish, pending " << thread_pool.PendingNum() << " tasks ..." << std::endl; sleep(1); diff --git a/src/teracli_main.cc b/src/teracli_main.cc index 49c29dd6d..31c9dd55a 100644 --- a/src/teracli_main.cc +++ b/src/teracli_main.cc @@ -2,7 +2,6 @@ // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // - #include #include #include @@ -10,13 +9,14 @@ #include #include +#include #include #include #include #include #include #include - +#include #include #include @@ -26,6 +26,10 @@ #include "common/console/progress_bar.h" #include "common/file/file_path.h" #include "io/coding.h" +#include "io/utils_leveldb.h" +#include "leveldb/dfs.h" +#include "util/nfs.h" +#include "util/hdfs.h" #include "proto/kv_helper.h" #include "proto/proto_helper.h" #include "proto/tabletnode.pb.h" @@ -36,6 +40,7 @@ #include "sdk/sdk_zk.h" #include "sdk/table_impl.h" #include "tera.h" +#include "types.h" #include "utils/crypt.h" #include "utils/string_util.h" #include "utils/tprinter.h" @@ -50,6 +55,15 @@ DECLARE_string(tera_zk_root_path); DECLARE_bool(tera_sdk_batch_scan_enabled); DECLARE_int64(tera_sdk_status_timeout); +DECLARE_string(tera_leveldb_env_type); +DECLARE_string(tera_leveldb_env_dfs_type); +DECLARE_string(tera_leveldb_env_nfs_mountpoint); +DECLARE_string(tera_leveldb_env_nfs_conf_path); +DECLARE_string(tera_leveldb_env_hdfs2_nameservice_list); +DECLARE_string(tera_dfs_so_path); +DECLARE_string(tera_dfs_conf); +DECLARE_uint64(tera_sdk_read_max_qualifiers); + DEFINE_int32(tera_client_batch_put_num, 1000, "num of each batch in batch put mode"); DEFINE_int32(tera_client_scan_package_size, 1024, "the package size (in KB) of each scan request"); @@ -59,6 +73,7 @@ DEFINE_string(rollback_name, "", "rollback operation's name"); DEFINE_int32(lg, -1, "locality group number."); DEFINE_int32(concurrency, 1, "concurrency for compact table."); +DEFINE_int32(compact_timeout, 120000, "tablet compact timeout(ms), default 20min"); DEFINE_int64(timestamp, -1, "timestamp."); DEFINE_string(tablets_file, "", "tablet set file"); @@ -71,6 +86,15 @@ DEFINE_bool(rowkey_count, false, "is print rowkey count when scan"); DEFINE_bool(stdout_is_tty, true, "is stdout connected to a tty"); DEFINE_bool(reorder_tablets, false, "reorder tablets by ts list"); +// dfs related FLAGS +DEFINE_bool(asowner, false, "become owner and execute the command"); +DEFINE_bool(e, false, "test dfs file exist or not"); +DEFINE_bool(z, false, "test dfs file is zero or not"); +DEFINE_bool(d, false, "test dfs file is directory or not"); +DEFINE_bool(override, false, "dfs put file override the existing one"); +DEFINE_bool(attribute, false, "dfs list file detail attribute"); +DEFINE_bool(recursive, false, "dfs remove file recursively"); + volatile int32_t g_start_time = 0; volatile int32_t g_end_time = 0; volatile int32_t g_used_time = 0; @@ -88,16 +112,25 @@ using namespace tera; typedef std::shared_ptr
TablePtr; typedef std::shared_ptr TableImplPtr; typedef std::map CommandTable; - +// FileSystem command table +typedef std::map FSCommandTable; +//typedef std::map > FSCommandTable; /// global variables of single-row-txn used in interactive mode tera::Transaction* g_row_txn = NULL; Table* g_row_txn_table = NULL; +leveldb::Dfs* g_dfs = NULL; + static CommandTable& GetCommandTable(){ static CommandTable command_table; return command_table; } +static FSCommandTable& GetFSCommandTable() { + static FSCommandTable fs_command_table; + return fs_command_table; +} + const char* builtin_cmd_list[] = { "create", "create [] \n\ @@ -224,6 +257,13 @@ const char* builtin_cmd_list[] = { commit \n\ (only support single row transaction)", + "cas", + "cas \n\ + Compare and set a value atomically. (The txn value of table schema must be 'on') \n\ + This command will compare the value at rowkey:columnfamily:qualifier with : \n\ + -> equal : put to this location. \n\ + -> not equal: do nothing.", + "user", "user \n\ create \n\ @@ -236,8 +276,14 @@ const char* builtin_cmd_list[] = { "tablet", "tablet \n\ move \n\ + movex \n\ + * only for force move tablet ignore error \n\ reload \n\ force to unload and load on the same ts \n\ + reloadx \n\ + force to unload and load on the same ts \n\ + * only for force reload tablet ignore error \n\ + lg_list : lg1:lg2:lg3 \n\ compact \n\ split \n\ merge \n\ @@ -290,9 +336,27 @@ const char* builtin_cmd_list[] = { "help [cmd] \n\ show manual for a or all cmd(s)", + "dfs", + "dfs [cmd] args \n\ + mkdir $NFS_PATH \n\ + touchz $NFS_PATH \n\ + test [-e|-z|-d] $NFS_PATH \n\ + get $NFS_PATH $LOCAL_PATH \n\ + put [--override] $LOCAL_PATH $NFS_PATH \n\ + ls [--attribute] $NFS_PATH \n\ + lsr [--attribute] $NFS_PATH \n\ + dus $NFS_PATH \n\ + rm [--recursive] $NFS_PATH \n\ + stat $NFS_PATH \n\ + rename $NFS_PATH_SRC $NFS_PATH_DEST \n\ + unlockdir $NFS_PATH \n\ + checksum $NFS_PATH $OFFSET $LENGTH \n\ + forcerelease $NFS_PATH", + "version", "version \n\ show version info", + }; static void PrintCmdHelpInfo(const char* msg) { @@ -662,21 +726,21 @@ int32_t PutOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { value = argv[5]; } - RowMutation* mutation = table->NewRowMutation(rowkey); + std::unique_ptr mutation(table->NewRowMutation(rowkey)); if (FLAGS_timestamp == -1) { mutation->Put(columnfamily, qualifier, value); } else { mutation->Put(columnfamily, qualifier, FLAGS_timestamp, value); } if (g_row_txn != NULL) { - g_row_txn->ApplyMutation(mutation); + g_row_txn->ApplyMutation(mutation.get()); } else { - table->ApplyMutation(mutation); + table->ApplyMutation(mutation.get()); } if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { std::cout << mutation->GetError().ToString() << std::endl; + return -1; } - delete mutation; return 0; } @@ -912,7 +976,7 @@ int32_t GetOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { std::string columnfamily = ""; std::string qualifier = ""; std::string value; - RowReader* reader = table->NewRowReader(rowkey); + std::unique_ptr reader(table->NewRowReader(rowkey)); if (argc == 4) { // use table as kv or get row } else if (argc == 5) { @@ -924,10 +988,11 @@ int32_t GetOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { reader->AddColumnFamily(columnfamily); } } + reader->SetMaxQualifiers(FLAGS_tera_sdk_read_max_qualifiers); if (g_row_txn != NULL) { - g_row_txn->Get(reader); + g_row_txn->Get(reader.get()); } else { - table->Get(reader); + table->Get(reader.get()); } while (!reader->Done()) { std::cout << PrintableFormatter(reader->RowName()) << ":" @@ -939,8 +1004,8 @@ int32_t GetOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { if (reader->GetError().GetType() != tera::ErrorCode::kOK && reader->GetError().GetType() != tera::ErrorCode::kNotFound) { std::cout << reader->GetError().ToString() << std::endl; + return -1; } - delete reader; return 0; } @@ -1052,6 +1117,7 @@ int32_t ScanRange(TablePtr& table, ScanDescriptor& desc, ErrorCode* err) { desc.SetBufferSize(FLAGS_tera_client_scan_package_size << 10); desc.SetAsync(FLAGS_tera_sdk_batch_scan_enabled); desc.SetSnapshot(FLAGS_snapshot); + desc.SetMaxQualifiers(FLAGS_tera_sdk_read_max_qualifiers); ResultStream* result_stream; if ((result_stream = table->Scan(desc, err)) == NULL) { @@ -1161,7 +1227,7 @@ std::string BytesNumberToString(const uint64_t size) { std::string DateNumberToString(int64_t ts) { if (FLAGS_stdout_is_tty) { - return common::timer::get_time_str(ts); + return get_time_str(ts); } return NumberToString(ts); } @@ -1172,6 +1238,10 @@ static std::string GetTabletStatusString(const TabletMetaList& tablet_list, int6 // new tera master int64_t delta = now - tablet_list.timestamp(i); TabletStatus status = tablet_list.meta(i).status(); + TabletStatus db_status = tablet_list.counter(i).db_status(); + if (db_status == kTabletCorruption) { + return StatusCodeToString(db_status); + } if ((status == kTableReady) && (delta > FLAGS_tera_sdk_status_timeout * 1000000)) { return "kUnknown"; } else { @@ -1187,7 +1257,7 @@ int32_t ShowTabletList(const TabletMetaList& tablet_list, bool is_server_addr, b TPrinter printer; int cols; std::vector row; - int64_t now = common::timer::get_micros(); + int64_t now = get_micros(); if (is_x) { if (is_server_addr) { cols = 14; @@ -1492,7 +1562,7 @@ int32_t ShowSingleTable(Client* client, const string& table_name, if (FLAGS_stdout_is_tty) { std::cout << std::endl; std::cout << "create time: " - << common::timer::get_time_str(table_meta.create_time()) << std::endl; + << get_time_str(table_meta.create_time()) << std::endl; std::cout << std::endl; } ShowTabletList(tablet_list, true, is_x); @@ -1514,7 +1584,7 @@ int32_t ShowSingleTabletNodeInfo(Client* client, const string& addr, std::cout << " address: " << info.addr() << std::endl; std::cout << " status: " << info.status_m() << std::endl; std::cout << " update time: " - << common::timer::get_time_str(info.timestamp() / 1000000) << "\n\n"; + << get_time_str(info.timestamp() / 1000000) << "\n\n"; int cols = 4; TPrinter printer(cols, "workload", "tablets", "load", "split"); @@ -1582,7 +1652,7 @@ int32_t ShowTabletNodesInfo(Client* client, bool is_x, ErrorCode* err) { return -1; } - int64_t now = common::timer::get_micros(); + int64_t now = get_micros(); int cols; TPrinter printer; if (is_x) { @@ -2256,7 +2326,7 @@ int32_t CompactTablet(TabletInfo& tablet, int lg) { request.set_tablet_name(tablet.table_name); request.mutable_key_range()->set_key_start(tablet.start_key); request.mutable_key_range()->set_key_end(tablet.end_key); - tabletnode::TabletNodeClient tabletnode_client(tablet.server_addr, 60000); + tabletnode::TabletNodeClient tabletnode_client(tablet.server_addr, FLAGS_compact_timeout); std::string path; if (lg >= 0) { @@ -2292,6 +2362,77 @@ int32_t CompactTablet(TabletInfo& tablet, int lg) { return 0; } +static bool ComputeCompactInsertKeys(RawKey rawkey, std::string* start_key, std::string* end_key) { + static std::string x0("\x0", 1); + static std::string x1("\x1", 1); + *start_key = (rawkey == Readable ? *start_key + x1 : *start_key + x0); + + // pop all '\x0' charcters at the tailing of end_key. Note that Readable should not contain any + // '\x0' characters but here we do not + while (end_key->size() > 0) { + unsigned char last = end_key->at(end_key->size() - 1); + if (last == '\x0') { + end_key->pop_back(); + } + // for Readable key, if the last nonzero character of end_key is '\x1', the wanted key that + // is barely smaller than end_key is computed as: end_key.substr(0, end_key.rfind('\x1')); + // eg: end_key: abcde'\x1' -> wanted key: abcde + else if (rawkey == Readable && last == '\x1'){ + end_key->pop_back(); + return true; + } + else { + break; + } + } + // for other case, the wanted key that is barely smaller than end_key is computed as: minus the + // last char of end_key with 1 and append '\x255' to end key until it reaches the max keysize + // allowed. Notice that the last char of end_key will not be '\x0' for Binary key and not be + // '\x0' nor '\x1' for Readable key here + if (end_key->size() > 0) { + (*end_key)[end_key->size() - 1] = char((*end_key)[end_key->size() - 1] - 1); + } + end_key->resize(kRowkeySize - 1, char(255)); + return true; +} + +void CompactPreprocess(TableImplPtr table, const std::vector& tablet_infos) { + std::vector readers; + for (std::size_t i = 0; i < tablet_infos.size(); ++i) { + const TabletInfo& tablet_info = tablet_infos[i]; + std::string start_key(tablet_info.start_key); + std::string end_key(tablet_info.end_key); + ComputeCompactInsertKeys(table->GetTableSchema().raw_key(), &start_key, &end_key); + std::vector readers; + RowReader* start_reader = table->NewRowReader(start_key); + RowReader* end_reader = table->NewRowReader(end_key); + readers.push_back(start_reader); + readers.push_back(end_reader); + } + if (readers.size() > 0) { + table->Get(readers); + } + std::vector mutations; + for (std::size_t i = 0; i < readers.size(); ++i) { + if (readers[i]->GetError().GetType() == tera::ErrorCode::kNotFound) { + RowMutation* mutation = table->NewRowMutation(readers[i]->RowKey()); + mutation->DeleteRow(); + mutations.push_back(mutation); + } + delete readers[i]; + } + if (mutations.size() > 0) { + table->ApplyMutation(mutations); + for (std::size_t i = 0; i < mutations.size(); ++i) { + if (mutations[i]->GetError().GetType() != tera::ErrorCode::kOK) { + LOG(WARNING) <<"write key " << DebugString(mutations[i]->RowKey()) + << " failed, error: " << mutations[i]->GetError().ToString(); + } + delete mutations[i]; + } + } +} + int32_t CompactTabletOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { if (argc != 4) { PrintCmdHelpInfo(argv[1]); @@ -2337,6 +2478,18 @@ int32_t CompactTabletOp(Client* client, int32_t argc, std::string* argv, ErrorCo << ", total tablets: " << tablet_list.size(); return -4; } + std::string command = argv[1]; + if (command == "compactx") + { + tera::ClientImpl* client_impl = static_cast(client); + TableImplPtr table_impl(client_impl->OpenTableInternal(table, err)); + if (table_impl == NULL) { + LOG(ERROR) << "fail to open table: " << table; + return -5; + } + std::vector tablet_infos(1, *tablet_it); + CompactPreprocess(table_impl, tablet_infos); + } return CompactTablet(*tablet_it, lg); } @@ -2409,32 +2562,34 @@ int32_t ScanTabletOp(Client* client, int32_t argc, std::string* argv, ErrorCode* } int32_t TabletOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { - if ((argc != 4) && (argc != 5)) { + if ((argc != 4) && (argc != 5) && (argc != 6)) { PrintCmdHelpInfo(argv[1]); return -1; } std::string op = argv[2]; + std::string tablet_id = argv[3]; + std::string server_addr; - if (op == "compact") { + std::vector arg_list; + arg_list.push_back(op); + arg_list.push_back(tablet_id); + if (op == "compact" || op == "compactx") { return CompactTabletOp(client, argc, argv, err); } else if (op == "scan" || op == "scanallv") { return ScanTabletOp(client, argc, argv, err); - } else if (op != "move" && op != "split" && op != "merge" && op != "reload") { + } else if (argc == 4 && (op == "reload" || op == "merge" || op == "split")) { + // nothing to do + } else if (argc == 5 && (op == "reloadx" || op == "move" || op == "split")) { + // reloadx->lg_list move->server_addr split->split_key + arg_list.push_back(argv[4]); + } else if (argc == 6 && op == "movex") { + arg_list.push_back(argv[4]); // server_addr + arg_list.push_back(argv[5]); // lg_list + } else { PrintCmdHelpInfo(argv[1]); return -1; } - - std::string tablet_id = argv[3]; - std::string server_addr; - if (argc == 5) { - server_addr = argv[4]; - } - - std::vector arg_list; - arg_list.push_back(op); - arg_list.push_back(tablet_id); - arg_list.push_back(server_addr); if (!client->CmdCtrl("tablet", arg_list, NULL, NULL, err)) { LOG(ERROR) << "fail to " << op << " tablet " << tablet_id; return -1; @@ -2543,6 +2698,19 @@ int32_t CompactOp(Client* client, int32_t argc, std::string* argv, ErrorCode* er } ReorderTabletList(&tablet_list); + std::string command = argv[1]; + if (command == "compactx") + { + tera::ClientImpl* client_impl = static_cast(client); + TableImplPtr table_impl(client_impl->OpenTableInternal(tablename, err)); + if (table_impl == NULL) { + LOG(ERROR) << "fail to open table: " << tablename; + return -5; + } + std::cout << "begin compact preprocess tablet: " << tablename << std::endl; + CompactPreprocess(table_impl, tablet_list); + } + int conc = FLAGS_concurrency; if (conc <= 0 || conc > 1000) { LOG(ERROR) << "compact concurrency illegal: " << conc; @@ -2556,7 +2724,7 @@ int32_t CompactOp(Client* client, int32_t argc, std::string* argv, ErrorCode* er thread_pool.AddTask(task); } while (thread_pool.PendingNum() > 0) { - std::cerr << common::timer::get_time_str(time(NULL)) << " " + std::cerr << get_time_str(time(NULL)) << " " << thread_pool.PendingNum() << " tablets waiting for compact ..." << std::endl; sleep(5); @@ -3189,6 +3357,65 @@ int TxnOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { } } +int32_t CasOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc != 7) { + LOG(ERROR) << "args number error: " << argc << ", need 7"; + PrintCmdHelpInfo(argv[1]); + return -1; + } + + const std::string& tablename = argv[2]; + TablePtr table(client->OpenTable(tablename, err)); + if (!table) { + LOG(ERROR) << "fail to open table"; + return -1; + } + + const std::string& rowkey = argv[3]; + const std::string& old_val = argv[5]; + const std::string& new_val = argv[6]; + std::string columnfamily = ""; + std::string qualifier = ""; + ParseCfQualifier(argv[4], &columnfamily, &qualifier); + + std::unique_ptr txn(table->StartRowTransaction(rowkey)); + if (!txn) { + LOG(ERROR) << "fail to start row txn"; + return -1; + } + + std::unique_ptr reader(table->NewRowReader(rowkey)); + reader->AddColumn(columnfamily, qualifier); + txn->Get(reader.get()); + if (reader->Done()) { + std::cout << "cas failed: NotFound" << std::endl; + return -1; + } + std::string cur_val = reader->Value(); + if (cur_val != old_val) { + std::cout << "cas failed: NotEqual" << std::endl; + return -1; + } + + std::unique_ptr mutation(table->NewRowMutation(rowkey)); + mutation->Put(columnfamily, qualifier, new_val); + txn->ApplyMutation(mutation.get()); + if (mutation->GetError().GetType() != tera::ErrorCode::kOK) { + std::cout << "cas failed: " << tera::strerr(mutation->GetError()) << std::endl; + return -1; + } + + auto error_code = txn->Commit(); + if (error_code.GetType() != tera::ErrorCode::kOK) { + std::cout << "cas failed: " << tera::strerr(error_code) << std::endl; + return -1; + } else { + std::cout << "cas success" << std::endl; + } + + return 0; +} + int32_t HelpOp(Client*, int32_t argc, std::string* argv, ErrorCode*) { if (argc == 2) { PrintAllCmd(); @@ -3217,6 +3444,469 @@ bool ParseCommand(int argc, char** arg_list, std::vector* parsed_ar return true; } + +int32_t InitDfsClient() { + if (g_dfs != NULL) { + return 0; + } + if (FLAGS_tera_leveldb_env_dfs_type == "nfs") { + if (access(FLAGS_tera_leveldb_env_nfs_conf_path.c_str(), R_OK) == 0) { + LOG(INFO) << "init nfs system: use configure file" << FLAGS_tera_leveldb_env_nfs_conf_path; + leveldb::Nfs::Init(FLAGS_tera_leveldb_env_nfs_mountpoint, FLAGS_tera_leveldb_env_nfs_conf_path); + g_dfs = leveldb::Nfs::GetInstance(); + } + else { + LOG(FATAL) << "init nfs system: no configure file found"; + return -1; + } + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs2") { + LOG(INFO) << "hdfs2 system support currently, please use hadoop-client"; + g_dfs = new leveldb::Hdfs2(FLAGS_tera_leveldb_env_hdfs2_nameservice_list); + } else if (FLAGS_tera_leveldb_env_dfs_type == "hdfs") { + g_dfs = new leveldb::Hdfs(); + } + else { + LOG(INFO) << "init dfs system: " << FLAGS_tera_dfs_so_path << "(" << FLAGS_tera_dfs_conf << ")"; + g_dfs = leveldb::Dfs::NewDfs(FLAGS_tera_dfs_so_path, FLAGS_tera_dfs_conf); + } + return 0; +} + +int32_t FileSystemOp(Client* client, int32_t argc, std::string* argv, ErrorCode* err) { + if (argc < 4) { + PrintCmdHelpInfo(argv[1]); + return -1; + } + if (0 != InitDfsClient()) { + LOG(FATAL) << "InitDfsClient failed"; + return -1; + } + std::string operation = argv[2]; + if (GetFSCommandTable().find(operation) == GetFSCommandTable().end()) { + std::cerr << "unsupported dfs command: " << operation << std::endl; + return -1; + } + int ret = (GetFSCommandTable().find(operation)->second)(argc, argv, err); + return ret; +} + +int DfsPrintAttr(const char* pathname, struct stat* st, void* arg = NULL) { + char mode_str[10]; + memset(mode_str, '-', sizeof(mode_str)); + char time_str[64]; + strftime(time_str, sizeof(time_str), "%b %d %H:%M %Y", localtime(&st->st_mtime)); + printf("%c%c%c%c%c%c%c%c%c%c %16lx %16ld %s %s", + (S_IFDIR & st->st_mode) ? 'd' : '-', + (S_IRUSR & st->st_mode) ? 'r' : '-', + (S_IWUSR & st->st_mode) ? 'w' : '-', + (S_IXUSR & st->st_mode) ? 'x' : '-', + (S_IRGRP & st->st_mode) ? 'r' : '-', + (S_IWGRP & st->st_mode) ? 'w' : '-', + (S_IXGRP & st->st_mode) ? 'x' : '-', + (S_IROTH & st->st_mode) ? 'r' : '-', + (S_IWOTH & st->st_mode) ? 'w' : '-', + (S_IXOTH & st->st_mode) ? 'x' : '-', + st->st_ino, + st->st_size, time_str, pathname); + if (S_IFDIR & st->st_mode) { + printf("/"); + } + printf("\n"); + return 0; +} + +static std::string FormatPath(const std::string pathname) { + std::string result; + bool need_strip = false; + for (std::string::size_type i = 0; i < pathname.length(); ++i) { + if (pathname.at(i) == '/') { + if (need_strip) { + continue; + } + else { + result.push_back(pathname.at(i)); + need_strip = true; + } + } else { + need_strip = false; + result.push_back(pathname.at(i)); + } + } + if (result.at(result.length() - 1) == '/') { + result.pop_back(); + } + return result; +} + +int32_t DfsPrintPath(const char* pathname, struct stat* st, void* arg = NULL) { + printf("%s", FormatPath(pathname).c_str()); + if (S_IFDIR & st->st_mode) { + printf("/"); + } + printf("\n"); + return 0; +} + +int32_t DfsSizeSum(const char* pathname, struct stat* st, void* arg) { + uint64_t* sum = reinterpret_cast(arg); + if (!(S_IFDIR & st->st_mode)) { + *sum += st->st_size; + } + return 0; +} + +int32_t DfsTryLockParentPath(const std::string path) { + std::string parent_path = path; + if (parent_path.at(parent_path.length() - 1) == '/') { + parent_path.pop_back(); + } + std::string::size_type pos = parent_path.rfind("/"); + if (pos == std::string::npos) { + fprintf(stderr, "invalid path: %s\n", path.c_str()); + return -1; + } + if (pos == 0) { + parent_path = "/"; + } + parent_path = parent_path.substr(0, pos); + return g_dfs->LockDirectory(parent_path); +} + +int32_t DfsRmPath(const char* pathname, struct stat* st, void*) { + int ret = 0; + if (S_IFDIR & st->st_mode) { + ret = g_dfs->DeleteDirectory(pathname); + if (0 != ret) { + perror("RmDir fail"); + return ret; + } + } else { + ret = g_dfs->Delete(pathname); + if (0 != ret) { + perror("unlink fail"); + } + } + return ret; +} + +typedef int(*WalkFunc)(const char*, struct stat*, void* arg); +int32_t DfsDirWalk(const char* dir_name, WalkFunc func, bool is_recursive, void* arg = NULL) { + struct stat st; + memset(&st, 0, sizeof(struct stat)); + char fullpath[4096] = {0}; + // not a directory, end of recursive call + if (0 == g_dfs->Stat(dir_name, &st) && !(S_IFDIR & st.st_mode)) { + return 0; + } + std::vector sub_paths; + if (0 != g_dfs->ListDirectory(dir_name, &sub_paths)) { + return -1; + } + if (func == DfsRmPath && FLAGS_asowner) { + if (0 != g_dfs->LockDirectory(dir_name)) { + fprintf(stderr, "Lock Directory %s failed", dir_name); + return -1; + } + } + for (std::size_t i = 0; i < sub_paths.size(); ++i) { + snprintf(fullpath, sizeof(fullpath), "%s/%s", dir_name, sub_paths[i].c_str()); + memset(&st, 0, sizeof(struct stat)); + if (g_dfs->Stat(fullpath, &st) < 0) { + perror("Stat failed"); + continue; + } + if (is_recursive && (S_IFDIR & st.st_mode)) { + DfsDirWalk(fullpath, func, true, arg); + } + func(fullpath, &st, arg); + } + return 0; +} + + +int32_t DfsGetOp(int32_t argc, std::string* argv, ErrorCode* err) { + if (argc != 5) { + fprintf(stderr, "Invalid arguments"); + return -1; + } + int ret = 0; + const std::string& src_path = argv[3]; + const std::string& local_path = argv[4]; + std::string local_file_path = local_path; + int local_fd = 0; + if (local_path != "-") { + struct stat st; + if (stat(local_path.c_str(), &st) == 0 && (S_IFDIR & st.st_mode)) { + char* tmp_src_path = strdup(src_path.c_str()); + char* filename = basename(tmp_src_path); + local_file_path.append("/").append(filename); + free(tmp_src_path); + } + local_fd = open(local_file_path.c_str(), O_CREAT | O_WRONLY | O_TRUNC, 0644); + if (local_fd < 0) { + fprintf(stderr, "local file open fail, path=%s, errno=%d", local_file_path.c_str(), errno); + return errno; + } + } + leveldb::DfsFile* file = g_dfs->OpenFile(src_path, leveldb::RDONLY); + if (NULL == file) { + fprintf(stderr, "open dfs file fail, path=%s, errno=%d", src_path.c_str(), errno); + return errno; + } + char buf[128 * 1024]; + ssize_t ret_size = 0; + while ((ret_size = file->Read(buf, sizeof(buf))) > 0) { + ssize_t writelen = write(local_fd, buf, ret_size); + if (writelen < 0) { + fprintf(stderr, "write local file fail, path=%s, errno=%d", local_file_path.c_str(), errno); + break; + ret = errno; + } + } + if (local_fd > 0) { + close(local_fd); + } + file->CloseFile(); + + return ret; +} + +int32_t DfsPutOp(int32_t argc, std::string* argv, ErrorCode* err) { + fprintf(stderr, "not implemented"); + return -1; +} + + +int32_t DfsLsOp(int32_t argc, std::string* argv, ErrorCode* err) { + const std::string& filename = argv[3]; + struct stat fstat; + int ret = 0; + if (0 == g_dfs->Stat(filename.c_str(), &fstat)) { + if (S_IFDIR & fstat.st_mode) { + if (FLAGS_attribute) { + DfsPrintAttr(filename.c_str(), &fstat); + ret = DfsDirWalk(filename.c_str(), DfsPrintAttr, FLAGS_recursive); + } else { + DfsPrintPath(filename.c_str(), &fstat); + ret = DfsDirWalk(filename.c_str(), DfsPrintPath, FLAGS_recursive); + } + } + else { + if (FLAGS_attribute) { + DfsPrintAttr(filename.c_str(), &fstat); + } + else { + DfsPrintPath(filename.c_str(), &fstat); + } + } + } + return ret; +} +int32_t DfsLsrOp(int32_t argc, std::string* argv, ErrorCode* err) { + + bool old_recursive_flag = FLAGS_recursive; + FLAGS_recursive = true; + DfsLsOp(argc, argv, err); + FLAGS_recursive = old_recursive_flag; + return errno; +} + +int32_t DfsDusOp(int32_t argc, std::string* argv, ErrorCode* err) { + struct stat st; + const std::string& path = argv[3]; + uint64_t size = 0; + if (g_dfs->Stat(path, &st) != 0) { + perror("Stat failed"); + return errno; + } + if (S_IFDIR & st.st_mode) { + DfsDirWalk(path.c_str(), DfsSizeSum, true, &size); + } else { + DfsSizeSum(path.c_str(), &st, &size); + } + fprintf(stdout, "%s:\t%lu\n", path.c_str(), size); + return 0; +} + +int32_t DfsTouchzOp(int32_t argc, std::string* argv, ErrorCode* err) { + const std::string& path = argv[3]; + struct stat st; + std::string::size_type pos = path.rfind("/"); + if (pos == std::string::npos || pos == path.length() - 1) { + fprintf(stderr, "invalid filepath: %s", path.c_str()); + return -1; + } + + int ret = g_dfs->Stat(path, &st); + if (0 != ret) { + if (errno != ENOENT) { + perror("Stat failed"); + return errno; + } + std::string parent_path = path.substr(0, pos); + ret = g_dfs->CreateDirectory(parent_path); + if (0 != ret) { + perror("create parent path failed"); + return errno; + } + if (FLAGS_asowner) { + DfsTryLockParentPath(path); + } + leveldb::DfsFile* file = g_dfs->OpenFile(path, leveldb::WRONLY); + if (NULL == file) { + perror("create or open file fail"); + return errno; + } + } else { + if (S_IFDIR & st.st_mode) { + fprintf(stderr, "Touchz fail: %s not Regular file", path.c_str()); + ret = EISDIR; + } else { + fprintf(stdout, "%s already exists", path.c_str()); + ret = EEXIST; + } + } + return ret; +} + +int32_t DfsMkdirOp(int32_t argc, std::string* argv, ErrorCode* err) { + const std::string& path = argv[3]; + if (FLAGS_asowner) { + if (0 != DfsTryLockParentPath(path)) { + fprintf(stderr, "Try lock parent path failed"); + return -1; + } + } + int ret = g_dfs->CreateDirectory(path); + if (0 != ret) { + fprintf(stderr, "Create Path: %s failed, errno=%d\n", path.c_str(), errno); + ret = errno; + } + return ret; +} + +int32_t DfsRmOp(int32_t argc, std::string* argv, ErrorCode* err) { + const std::string& path = argv[3]; + struct stat st; + if (0 != g_dfs->Stat(path.c_str(), &st)) { + perror("Stat fail: "); + return -1; + } + int ret = 0; + if (FLAGS_asowner) { + DfsTryLockParentPath(path); + } + if (st.st_mode & S_IFDIR) { + if (FLAGS_recursive) { + DfsDirWalk(path.c_str(), DfsRmPath, true, NULL); + ret = g_dfs->DeleteDirectory(path); + } else { + ret = g_dfs->DeleteDirectory(path); + } + } else { + ret = g_dfs->Delete(path); + } + if (0 != ret) { + perror("delete failed: "); + } + + return errno; +} + +int32_t DfsTestOp(int32_t argc, std::string* argv, ErrorCode* err) { + fprintf(stderr, "not implemented\n"); + return -1; +} + +int32_t DfsStatOp(int32_t argc, std::string* argv, ErrorCode* err) { + struct stat st; + const std::string& filename = argv[3]; + if (0 != g_dfs->Stat(filename, &st)) { + return errno; + } + const char* file_type; + if (S_IFREG & st.st_mode) { + file_type = "Regular"; + } else if (S_IFDIR & st.st_mode) { + file_type = "Directory"; + } else { + file_type = "Symlink"; + } + fprintf(stdout, "File:\t%s\n", filename.c_str()); + fprintf(stdout, "Inode:\t0x%lx\n", st.st_ino); + fprintf(stdout, "Type:\t%s\n", file_type); + fprintf(stdout, "Size:\t%lu\n", st.st_size); + fprintf(stdout, "Mode:\t%o\n", st.st_mode & 0777); + fprintf(stdout, "Link:\t%lu\n", st.st_nlink); + fprintf(stdout, "Atime:\t%lu\t%s", st.st_atime, ctime(&st.st_atime)); + fprintf(stdout, "Mtime:\t%lu\t%s", st.st_mtime, ctime(&st.st_mtime)); + fprintf(stdout, "Ctime:\t%lu\t%s", st.st_ctime, ctime(&st.st_ctime)); + + return 0; +} + +int32_t DfsRenameOp(int32_t argc, std::string* argv, ErrorCode* err) { + if (argc != 5) { + fprintf(stderr, "invalid arguments\n"); + return -1; + } + std::string& src_path = argv[3]; + std::string& dest_path = argv[4]; + if (FLAGS_asowner) { + if (0 != DfsTryLockParentPath(dest_path)) { + fprintf(stderr, "Lock ParentPath failed"); + return -1; + } + } + + int ret = g_dfs->Rename(src_path, dest_path); + if (0 != ret) { + perror("Rename fail"); + ret = errno; + } + return ret; +} + +int32_t DfsUnlockDirOp(int32_t argc, std::string* argv, ErrorCode* err) { + const std::string& path = argv[3]; + return g_dfs->ClearDirOwner(path); +} + +int32_t DfsChecksumOp(int32_t argc, std::string* argv, ErrorCode* err) { + fprintf(stderr, "Not Implemented"); + return -1; +} + +int32_t DfsLChecksumOp(int32_t argc, std::string* argv, ErrorCode* err) { + fprintf(stderr, "Not Implemented"); + return -1; +} + +int32_t DfsForceReleaseOp(int32_t argc, std::string* argv, ErrorCode* err) { + fprintf(stderr, "Not Implemented"); + return -1; +} + +static void InitializeFileSystemCommandTable() { + FSCommandTable& fs_command_table = GetFSCommandTable(); + fs_command_table["get"] = DfsGetOp; + fs_command_table["put"] = DfsPutOp; + fs_command_table["lsr"] = DfsLsrOp; + fs_command_table["ls"] = DfsLsOp; + fs_command_table["dus"] = DfsDusOp; + fs_command_table["touchz"] = DfsTouchzOp; + fs_command_table["mkdir"] = DfsMkdirOp; + fs_command_table["rm"] = DfsRmOp; + fs_command_table["test"] = DfsTestOp; + fs_command_table["stat"] = DfsStatOp; + fs_command_table["rename"] = DfsRenameOp; + fs_command_table["unlockdir"] = DfsUnlockDirOp; + fs_command_table["checksum"] = DfsChecksumOp; + fs_command_table["lchecksum"] = DfsLChecksumOp; + fs_command_table["forcerelease"] = DfsForceReleaseOp; + return; +} + static void InitializeCommandTable(){ CommandTable& command_table = GetCommandTable(); command_table["create"] = CreateOp; @@ -3257,6 +3947,7 @@ static void InitializeCommandTable(){ command_table["rename"] = RenameOp; command_table["meta"] = MetaOp; command_table["compact"] = CompactOp; + command_table["compactx"] = CompactOp; command_table["findmaster"] = FindMasterOp; command_table["findts"] = FindTsOp; command_table["findtablet"] = FindTabletOp; @@ -3270,6 +3961,9 @@ static void InitializeCommandTable(){ command_table["rangex"] = RangeOp; command_table["txn"] = TxnOp; command_table["help"] = HelpOp; + command_table["cas"] = CasOp; + command_table["dfs"] = FileSystemOp; + InitializeFileSystemCommandTable(); } int ExecuteCommand(Client* client, int argc, char** arg_list) { diff --git a/src/terautil.cc b/src/terautil.cc new file mode 100644 index 000000000..e4f5727d0 --- /dev/null +++ b/src/terautil.cc @@ -0,0 +1,732 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +// + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "ins_sdk.h" + +#include "common/base/string_ext.h" +#include "common/base/string_number.h" +#include "common/console/progress_bar.h" +#include "common/file/file_path.h" +#include "io/coding.h" +#include "proto/kv_helper.h" +#include "proto/proto_helper.h" +#include "proto/tabletnode.pb.h" +#include "proto/tabletnode_client.h" +#include "sdk/client_impl.h" +#include "sdk/cookie.h" +#include "sdk/sdk_utils.h" +#include "sdk/sdk_zk.h" +#include "sdk/table_impl.h" +#include "tera.h" +#include "types.h" +#include "utils/config_utils.h" +#include "utils/crypt.h" +#include "utils/schema_utils.h" +#include "utils/string_util.h" +#include "utils/tprinter.h" +#include "utils/utils_cmd.h" +#include "version.h" + +DECLARE_string(flagfile); +DECLARE_string(log_dir); +DECLARE_string(tera_master_meta_table_name); + +DEFINE_string(dump_tera_src_conf, "../conf/src_tera.flag", "src cluster for tera"); +DEFINE_string(dump_tera_dest_conf, "../conf/dest_tera.flag", "dest cluster for tera"); +DEFINE_string(dump_tera_src_root_path, "/xxx_", "src tera root path"); +DEFINE_string(dump_tera_dest_root_path, "/xxx_", "dest tera root path"); +DEFINE_string(ins_cluster_addr, "terautil_ins", "terautil dump ins cluster conf"); +DEFINE_string(ins_cluster_root_path, "/terautil/dump/xxxx", "dump meta ins"); +DEFINE_string(dump_tera_src_meta_addr, "", "src addr for meta_table"); +DEFINE_string(dump_tera_dest_meta_addr, "", "dest addr for meta_table"); +DEFINE_int64(dump_manual_split_interval, 1000, "manual split interval in ms"); +DEFINE_bool(dump_enable_manual_split, false, "manual split may take a long time, so disable it"); + +using namespace tera; + +const char* terautil_builtin_cmds[] = { + "dump", + "dump \n\ + prepare_safe \n\ + prepare \n\ + run \n\ + show \n\ + check", + + "help", + "help [cmd] \n\ + show manual for a or all cmd(s)", + + "version", + "version \n\ + show version info", +}; + +static void ShowCmdHelpInfo(const char* msg) { + if (msg == NULL) { + return; + } + int count = sizeof(terautil_builtin_cmds)/sizeof(char*); + for (int i = 0; i < count; i+=2) { + if(strncmp(msg, terautil_builtin_cmds[i], 32) == 0) { + std::cout << terautil_builtin_cmds[i + 1] << std::endl; + return; + } + } +} + +static void ShowAllCmd() { + std::cout << "there is cmd list:" << std::endl; + int count = sizeof(terautil_builtin_cmds)/sizeof(char*); + bool newline = false; + for (int i = 0; i < count; i+=2) { + std::cout << std::setiosflags(std::ios::left) << std::setw(20) << terautil_builtin_cmds[i]; + if (newline) { + std::cout << std::endl; + newline = false; + } else { + newline = true; + } + } + std::cout << std::endl << "help [cmd] for details." << std::endl; +} + +int32_t HelpOp(int32_t argc, char** argv) { + if (argc == 2) { + ShowAllCmd(); + } else if (argc == 3) { + ShowCmdHelpInfo(argv[2]); + } else { + ShowCmdHelpInfo("help"); + } + return 0; +} + +int DumpRange(const std::string& ins_cluster_addr, + const std::string& ins_cluster_root_path, + const tera::TableMetaList& table_list, + const tera::TabletMetaList& tablet_list) { + int res = 0; + galaxy::ins::sdk::SDKError ins_err; + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::string table_path = ins_cluster_root_path + "/table"; + std::string tablet_path = ins_cluster_root_path + "/tablet"; + //std::string lock_path = ins_cluster_root_path + "/lock"; + + for (int32_t i = 0; i < table_list.meta_size(); i++) { + const tera::TableMeta& meta = table_list.meta(i); + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + continue; + } + std::string key = table_path + "/" + meta.table_name(); + if(!ins_sdk.Put(key, meta.table_name(), &ins_err)) { + LOG(WARNING) << "ins put: " << key << ", error " << ins_err; + return -1; + } + } + + for (int32_t i = 0; i < tablet_list.meta_size(); i++) { + const tera::TabletMeta& meta = tablet_list.meta(i); + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + continue; + } + std::string table_name = meta.table_name(); + std::string key = tablet_path + "/" + meta.table_name() + "/" + meta.key_range().key_start(); + std::string val = "0"; + val.append(meta.key_range().key_end()); + if(!ins_sdk.Put(key, val, &ins_err)) { + LOG(WARNING) << "ins put: " << key << ", error " << ins_err; + return -1; + } + //std::string lock_key = lock_path + "/" + meta.table_name() + "/" + meta.key_range().key_start(); + } + return res; +} + +int ScanAndDumpMeta(const std::string& src_meta_tablet_addr, + const std::string& dest_meta_tablet_addr, + tera::TableMetaList* table_list, + tera::TabletMetaList* tablet_list) { + uint64_t seq_id = 0; + tera::ScanTabletRequest request; + tera::ScanTabletResponse response; + tera::WriteTabletRequest write_request; + tera::WriteTabletResponse write_response; + uint64_t request_size = 0; + write_request.set_sequence_id(seq_id++); + write_request.set_tablet_name(FLAGS_tera_master_meta_table_name); + write_request.set_is_sync(true); + write_request.set_is_instant(true); + + request.set_sequence_id(seq_id++); + request.set_table_name(FLAGS_tera_master_meta_table_name); + request.set_start(""); + request.set_end(""); + tera::tabletnode::TabletNodeClient src_meta_node_client(src_meta_tablet_addr); + bool success = true; + while ((success = src_meta_node_client.ScanTablet(&request, &response))) { + if (response.status() != tera::kTabletNodeOk) { + LOG(WARNING) << "dump: fail to load meta table: " + << StatusCodeToString(response.status()); + return -1; + } + int32_t record_size = response.results().key_values_size(); + LOG(INFO) << "scan meta table: " << record_size << " records"; + + bool need_dump = false; + std::string last_record_key; + for (int32_t i = 0; i < record_size; i++) { + const tera::KeyValuePair& record = response.results().key_values(i); + last_record_key = record.key(); + char first_key_char = record.key()[0]; + + TableMeta table_meta; + TabletMeta tablet_meta; + if (first_key_char == '~') { + LOG(INFO) << "(user: " << record.key().substr(1) << ")"; + } else if (first_key_char == '@') { + //ParseMetaTableKeyValue(record.key(), record.value(), table_list->add_meta()); + table_meta.Clear(); + ParseMetaTableKeyValue(record.key(), record.value(), &table_meta); + + std::string key, val; + //table_meta.set_status(kTableDisable); + table_meta.mutable_schema()->set_merge_size(0); // never merge during dump + table_meta.mutable_schema()->set_split_size(10000000); // never split during dump + MakeMetaTableKeyValue(table_meta, &key, &val); + + RowMutationSequence* mu_seq = write_request.add_row_list(); + mu_seq->set_row_key(record.key()); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(tera::kPut); + mutation->set_value(val); + request_size += mu_seq->ByteSize(); + if (request_size >= kMaxRpcSize) { // write req too large, dump into new tera cluster + need_dump = true; + } + + TableMeta* table_meta2 = table_list->add_meta(); + table_meta2->CopyFrom(table_meta); + } else if (first_key_char > '@') { + //ParseMetaTableKeyValue(record.key(), record.value(), tablet_list->add_meta()); + tablet_meta.Clear(); + ParseMetaTableKeyValue(record.key(), record.value(), &tablet_meta); + + std::string key, val; + tablet_meta.clear_parent_tablets(); + //tablet_meta.set_status(kTabletDisable); + MakeMetaTableKeyValue(tablet_meta, &key, &val); + + RowMutationSequence* mu_seq = write_request.add_row_list(); + mu_seq->set_row_key(record.key()); + Mutation* mutation = mu_seq->add_mutation_sequence(); + mutation->set_type(tera::kPut); + mutation->set_value(val); + request_size += mu_seq->ByteSize(); + if (request_size >= kMaxRpcSize) { // write req too large, dump into new tera cluster + need_dump = true; + } + + TabletMeta* tablet_meta2 = tablet_list->add_meta(); + tablet_meta2->CopyFrom(tablet_meta); + } else { + LOG(WARNING) << "dump: invalid meta record: " << record.key(); + } + } + + if ((need_dump || record_size <= 0) && + write_request.row_list_size() > 0) { + tabletnode::TabletNodeClient dest_meta_node_client(dest_meta_tablet_addr); + if (!dest_meta_node_client.WriteTablet(&write_request, &write_response)) { + LOG(WARNING) << "dump: fail to dump meta tablet: " + << StatusCodeToString(kRPCError); + return -1; + } + tera::StatusCode status = write_response.status(); + if (status == tera::kTabletNodeOk && write_response.row_status_list_size() > 0) { + status = write_response.row_status_list(0); + } + if (status != kTabletNodeOk) { + LOG(WARNING) << "dump: fail to dump meta tablet: " + << StatusCodeToString(status); + return -1; + } + write_request.clear_row_list(); + write_response.Clear(); + request_size = 0; + } + if (record_size <= 0) { + response.Clear(); + LOG(INFO) << "dump: scan meta table success"; + break; + } + + std::string next_record_key = tera::NextKey(last_record_key); + request.set_start(next_record_key); + request.set_end(""); + request.set_sequence_id(seq_id++); + response.Clear(); + } + return success? 0: -1; +} + +int DumpPrepareOp() { + int res = 0; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + std::string tera_src_root = FLAGS_dump_tera_src_root_path; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + std::string tera_dest_root = FLAGS_dump_tera_dest_root_path; + + // read src meta ts addr and dest meta ts addr + std::string src_meta_addr, dest_meta_addr; + src_meta_addr = FLAGS_dump_tera_src_meta_addr; + dest_meta_addr = FLAGS_dump_tera_dest_meta_addr; + + // scan and dump meta + tera::TableMetaList table_list; + tera::TabletMetaList tablet_list; + if ((res = ScanAndDumpMeta(src_meta_addr, dest_meta_addr, &table_list, &tablet_list)) >= 0) { + // create key range in nexus + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_root_path; + res = DumpRange(ins_cluster_addr, ins_cluster_root_path, table_list, tablet_list); + } + return res; +} + +int GetAndLockDumpRange(const std::string& ins_cluster_root_path, + std::string* table_name, + std::string* start_key, + std::string* end_key, + galaxy::ins::sdk::InsSDK* ins_sdk) { + int res = -1; + galaxy::ins::sdk::SDKError ins_err; + //std::string table_path = ins_cluster_root_path + "/table"; + std::string tablet_path = ins_cluster_root_path + "/tablet"; + std::string lock_path = ins_cluster_root_path + "/lock"; + + std::string start = tablet_path + "/"; + std::string end = tablet_path + "/"; + if (table_name->size()) { + start.append(*table_name); + start.append("/"); + start.append(*start_key); + if (*start_key == "") { + start.append(1, '\0'); + } + } + end.append(1, '\255'); + galaxy::ins::sdk::ScanResult* result = ins_sdk->Scan(start, end); + while (!result->Done()) { + if (result->Error() != galaxy::ins::sdk::kOK) { + LOG(INFO) << "scan fail: start " << start << ", end " << end << ", err " << result->Error(); + res = -1; + break; + } + std::string key = result->Key(); + std::string val = result->Value(); + std::string has_done = val.substr(0, 1); + if (has_done == "1") { // someone has copy it + result->Next(); + continue; + } + + //std::string key = tablet_path + "/" + meta.table_name() + "/" + meta.key_range().key_start(); + std::string str = key.substr(tablet_path.length() + 1); + std::size_t pos = str.find('/'); + *table_name = str.substr(0, pos); + *start_key = str.substr(pos + 1); + *end_key = val.substr(1); + + std::string lock_key = lock_path + "/" + *table_name + "/" + *start_key + "/"; + if (!ins_sdk->TryLock(lock_key, &ins_err)) { + LOG(INFO) << "ins: TryLock fail: " << lock_key << ", err " << ins_err; + result->Next(); + continue; + } + + std::string val1; + if (ins_sdk->Get(key, &val1, &ins_err)) { + has_done = val1.substr(0, 1); + } else { + LOG(INFO) << "ins: get fail: " << key << ", err " << ins_err; + } + if (has_done == "1") { // someone has copy it + if (!ins_sdk->UnLock(lock_key, &ins_err)) { + LOG(INFO) << "ins: unlock fail: " << lock_key << ", err " << ins_err; + } + result->Next(); + continue; + } + + res = 0; + break; // begin to scan + } + delete result; + return res; +} + +int ReleaseAndUnlockDumpRange(const std::string& ins_cluster_root_path, + const std::string& table_name, + const std::string& start_key, + const std::string& end_key, + galaxy::ins::sdk::InsSDK* ins_sdk) { + int res = 0; + galaxy::ins::sdk::SDKError ins_err; + //std::string table_path = ins_cluster_root_path + "/table"; + std::string tablet_path = ins_cluster_root_path + "/tablet"; + std::string lock_path = ins_cluster_root_path + "/lock"; + + std::string key = tablet_path + "/" + table_name + "/" + start_key; + std::string val = "1"; + val.append(end_key); + + if(!ins_sdk->Put(key, val, &ins_err)) { + LOG(WARNING) << "ins put: " << key << ", error " << ins_err; + } + + std::string lock_key = lock_path + "/" + table_name + "/" + start_key + "/"; + if (!ins_sdk->UnLock(lock_key, &ins_err)) { + LOG(WARNING) << "ins unlock fail: " << lock_key << ", error " << ins_err; + } + return res; +} + +struct ScanDumpContext { + Counter counter; + volatile bool fail; + std::string reason; +}; + +void ScanAndDumpCallBack(RowMutation* mu) { + ScanDumpContext* ctx = (ScanDumpContext*)mu->GetContext(); + if (mu->GetError().GetType() != tera::ErrorCode::kOK) { + if (ctx->fail == false) { + ctx->fail = true; + ctx->reason = mu->GetError().ToString(); + } + } + delete mu; + + ctx->counter.Dec(); + return; +} + +int ScanAndDumpData(Table* src, Table* dest, + const std::string& table_name, + const std::string& start_key, + const std::string& end_key) { + int res = 0; + ErrorCode err; + + ScanDescriptor desc(start_key); + desc.SetEnd(end_key); + desc.SetMaxVersions(std::numeric_limits::max()); + ResultStream* result_stream; + if ((result_stream = src->Scan(desc, &err)) == NULL) { + LOG(INFO) << "scan dump fail(new scan): " << table_name << ", start " << start_key + << ", end " << end_key; + return -1; + } + ScanDumpContext* ctx = new ScanDumpContext; + ctx->counter.Set(1); + ctx->fail = false; + while (!result_stream->Done(&err)) { + RowMutation* mu = dest->NewRowMutation(result_stream->RowName()); + mu->Put(result_stream->Family(), result_stream->Qualifier(), + result_stream->Value(), result_stream->Timestamp()); + ctx->counter.Inc(); + mu->SetContext(ctx); + mu->SetCallBack(ScanAndDumpCallBack); + dest->ApplyMutation(mu); + + result_stream->Next(); + } + delete result_stream; + ctx->counter.Dec(); + + while (ctx->counter.Get() > 0) { + sleep(3); + } + if (ctx->fail == true) { + LOG(INFO) << "scan dump fail: " << table_name << ", start " << start_key + << ", end " << end_key << ", reason " << ctx->reason; + res = -1; + } + delete ctx; + + if (err.GetType() != tera::ErrorCode::kOK) { + LOG(INFO) << "scan dump fail: " << table_name << ", start " << start_key + << ", end " << end_key << ", reason " << err.GetReason(); + res = -1; + } + return res; +} + +int DumpRunOp() { + int res = 0; + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_root_path; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + + // get and lock range + ErrorCode err; + Client* src_client = Client::NewClient(tera_src_conf, &err); + if (src_client == NULL) { + LOG(INFO) << "open src client fail: " << tera_src_conf << ", err " << err.ToString(); + return -1; + } + Client* dest_client = Client::NewClient(tera_dest_conf, &err); + if (dest_client == NULL) { + delete src_client; + src_client = NULL; + LOG(INFO) << "open dest client fail: " << tera_dest_conf << ", err " << err.ToString(); + return -1; + } + Table* src_table = NULL; + Table* dest_table = NULL; + + galaxy::ins::sdk::InsSDK ins_sdk(ins_cluster_addr); + std::string table_name, start_key, end_key, last_table_name; + while (GetAndLockDumpRange(ins_cluster_root_path, &table_name, &start_key, &end_key, &ins_sdk) == 0) { + if (last_table_name != table_name) { // table change + delete src_table; + delete dest_table; + src_table = NULL; + dest_table = NULL; + src_table = src_client->OpenTable(table_name, &err); + if (src_table == NULL) { + LOG(INFO) << "open src table fail: " << table_name << ", err " << err.ToString(); + continue; + } + dest_table = dest_client->OpenTable(table_name, &err); + if (dest_table == NULL) { + delete src_table; + src_table = NULL; + LOG(INFO) << "open dest table fail: " << table_name << ", err " << err.ToString(); + continue; + } + } + last_table_name = table_name; + if ((res = ScanAndDumpData(src_table, dest_table, table_name, start_key, end_key)) < 0) { + LOG(INFO) << "scan dump data fail: " << table_name << ", start " << start_key + << ", end " << end_key; + } else { + ReleaseAndUnlockDumpRange(ins_cluster_root_path, table_name, start_key, end_key, &ins_sdk); + } + start_key = end_key; + } + delete src_client; + delete dest_client; + return res; +} + +void GetTableKeyRange(const std::string& table_name, + const TabletMetaList& tablet_list, + std::vector* delimiters) { + for (int32_t i = 0; i < tablet_list.meta_size(); i++) { + const tera::TabletMeta& meta = tablet_list.meta(i); + if (table_name == meta.table_name() && + meta.key_range().key_start().size() > 0) { + delimiters->push_back(meta.key_range().key_start()); + } + } +} + +int ManualCreateTable(tera::ClientImpl* client, + const std::string& table_name, + const TableSchema& schema, + const std::vector& delimiters) { + ErrorCode err; + TableDescriptor table_desc; + table_desc.SetTableName(table_name); + TableSchemaToDesc(schema, &table_desc); + table_desc.SetSplitSize(10000000); + table_desc.SetMergeSize(0); + if (!client->CreateTable(table_desc, delimiters, &err)) { + LOG(INFO) << "manual create error: " << table_name << ", err: " << err.ToString(); + return -1; + } + return 0; +} + +int ManualSplitTable(tera::ClientImpl* client, + const std::string& table_name, + const std::vector& delimiters) { + ErrorCode err; + std::vector arg_list; + arg_list.push_back("split"); + arg_list.push_back(table_name); + for (uint32_t i = 0; i < delimiters.size(); i++) { + arg_list.push_back(delimiters[i]); + if (!client->CmdCtrl("table", arg_list, NULL, NULL, &err)) { + LOG(INFO) << "manual split table fail(ignore old master): " << table_name + << ", delimiters_size: " << delimiters.size() + << ", err: " << err.ToString(); + } + usleep(FLAGS_dump_manual_split_interval); + arg_list.pop_back(); + } + return 0; +} + +bool SchemaCompare(const TableSchema& src, const TableSchema& dest) { + return ((src.raw_key() == dest.raw_key()) && + (src.kv_only() == dest.kv_only()) && + (src.name() == dest.name()) && + (!IsSchemaCfDiff(src, dest)) && + (!IsSchemaLgDiff(src, dest))); +} + +int GetOrSetTabletLocationSafe(Client* src_client, + Client* dest_client, + TableMetaList* table_list, + TabletMetaList* tablet_list) { + // get src and dest tablet location + ErrorCode err; + TableMetaList src_table_list; + TabletMetaList src_tablet_list; + tera::ClientImpl* src_client_impl = static_cast(src_client); + if (!src_client_impl->ShowTablesInfo(&src_table_list, &src_tablet_list, false, &err)) { + LOG(INFO) << "tera_master show src cluster fail: " << err.ToString(); + return -1; + } + + TableMetaList dest_table_list; + TabletMetaList dest_tablet_list; + tera::ClientImpl* dest_client_impl = static_cast(dest_client); + if (!dest_client_impl->ShowTablesInfo(&dest_table_list, &dest_tablet_list, false, &err)) { + LOG(INFO) << "tera_master show dest cluster fail: " << err.ToString(); + return -1; + } + + // get table meta set + std::map src_table_set; + for (int32_t i = 0; i < src_table_list.meta_size(); i++) { + const tera::TableMeta& meta = src_table_list.meta(i); + TableSchema& schema = src_table_set[meta.table_name()]; + schema.CopyFrom(meta.schema()); + } + std::map dest_table_set; + for (int32_t i = 0; i < dest_table_list.meta_size(); i++) { + const tera::TableMeta& meta = dest_table_list.meta(i); + TableSchema& schema = dest_table_set[meta.table_name()]; + schema.CopyFrom(meta.schema()); + } + + // create or split table, and filter schema not match meta + for (int32_t i = 0; i < src_table_list.meta_size(); i++) { + const tera::TableMeta& meta = src_table_list.meta(i); + if (meta.table_name() == FLAGS_tera_master_meta_table_name) { + continue; + } + std::vector delimiters; + GetTableKeyRange(meta.table_name(), src_tablet_list, &delimiters); + if (dest_table_set.find(meta.table_name()) == dest_table_set.end()) { + if (ManualCreateTable(dest_client_impl, meta.table_name(), meta.schema(), delimiters) < 0) { + return -1; + } + } else if (SchemaCompare(dest_table_set[meta.table_name()], meta.schema())) { + if (FLAGS_dump_enable_manual_split && + ManualSplitTable(dest_client_impl, meta.table_name(), delimiters) < 0) { + return -1; + } + } else { + LOG(INFO) << "table schema not match: " << meta.table_name() << ", src schema: " << meta.schema().ShortDebugString() + << ", dest schema: " << dest_table_set[meta.table_name()].ShortDebugString(); + src_table_set.erase(meta.table_name()); + continue; + } + tera::TableMeta* meta2 = table_list->add_meta(); + meta2->CopyFrom(meta); + } + + // filter key range + for (int32_t i = 0; i < src_tablet_list.meta_size(); i++) { + const tera::TabletMeta& meta = src_tablet_list.meta(i); + if (src_table_set.find(meta.table_name()) == src_table_set.end()) { + continue; + } + tera::TabletMeta* meta2 = tablet_list->add_meta(); + meta2->CopyFrom(meta); + } + return 0; +} + +int DumpPrepareSafeOp() { + int res = 0; + std::string ins_cluster_addr = FLAGS_ins_cluster_addr; + std::string ins_cluster_root_path = FLAGS_ins_cluster_root_path; + std::string tera_src_conf = FLAGS_dump_tera_src_conf; + std::string tera_dest_conf = FLAGS_dump_tera_dest_conf; + + ErrorCode err; + std::unique_ptr src_client(Client::NewClient(tera_src_conf, &err)); + if (src_client == nullptr) { + LOG(INFO) << "open src client fail: " << tera_src_conf << ", err " << err.ToString(); + return -1; + } + std::unique_ptr dest_client(Client::NewClient(tera_dest_conf, &err)); + if (dest_client == nullptr) { + src_client = nullptr; + LOG(INFO) << "open dest client fail: " << tera_dest_conf << ", err " << err.ToString(); + return -1; + } + + // dump src cluster range into ins + TableMetaList table_list; + TabletMetaList tablet_list; + if (GetOrSetTabletLocationSafe(src_client.get(), dest_client.get(), &table_list, &tablet_list) < 0) { + return -1; + } + res = DumpRange(ins_cluster_addr, ins_cluster_root_path, table_list, tablet_list); + return res; +} + +int main(int argc, char* argv[]) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_flagfile == "") { + FLAGS_flagfile = "../conf/tera.flag"; + if (access(FLAGS_flagfile.c_str(), R_OK) != 0) { + FLAGS_flagfile = "./tera.flag"; + } + utils::LoadFlagFile(FLAGS_flagfile); + } + + if (argc > 1 && std::string(argv[1]) == "version") { + PrintSystemVersion(); + } else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "prepare") { + return DumpPrepareOp(); + } else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "prepare_safe") { + return DumpPrepareSafeOp(); + } else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "run") { + return DumpRunOp(); + //} else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "show") { + // return DumpShowOp(); + //} else if (argc > 2 && std::string(argv[1]) == "dump" && std::string(argv[2]) == "check") { + // return DumpCheckOp(): + } else { + HelpOp(argc, argv); + return -1; + } + return 0; +} + diff --git a/src/timeoracle/bench/timeoracle_bench.cc b/src/timeoracle/bench/timeoracle_bench.cc new file mode 100644 index 000000000..4140005bc --- /dev/null +++ b/src/timeoracle/bench/timeoracle_bench.cc @@ -0,0 +1,48 @@ +#include +#include +#include +#include "common/mutex.h" +#include "common/timer.h" +#include "common/thread_pool.h" +#include "common/this_thread.h" +#include "sdk/sdk_zk.h" + +#include "sdk/timeoracle_client_impl.h" +#include + +DEFINE_int64(client_thread_num, 10, ""); + +using namespace tera; +using namespace tera::timeoracle; + +std::shared_ptr g_thread_pool; + + +void worker() { + tera::sdk::ClusterFinder* cluster_finder = sdk::NewTimeoracleClusterFinder(); + tera::timeoracle::TimeoracleClientImpl client(g_thread_pool.get(), cluster_finder); + + while (true) { + int64_t st = client.GetTimestamp(1); + if (st <= 0) { + std::cout << "rpc failed" << std::endl; + ThisThread::Sleep(200); + } + } +} + +int main(int argc, char** argv) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + g_thread_pool.reset(new common::ThreadPool(FLAGS_client_thread_num + 1)); + + std::vector thread_list; + for (int64_t i = 0; i < FLAGS_client_thread_num; ++i) { + thread_list.push_back(std::thread(&worker)); + } + + for (auto& th : thread_list) { + th.join(); + } + + return 0; +} diff --git a/src/timeoracle/remote_timeoracle.h b/src/timeoracle/remote_timeoracle.h new file mode 100644 index 000000000..588bd0547 --- /dev/null +++ b/src/timeoracle/remote_timeoracle.h @@ -0,0 +1,73 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_TIMEORACLE_REMOTE_TIMEORACLE_H +#define TERA_TIMEORACLE_REMOTE_TIMEORACLE_H + +#include +#include "common/thread_pool.h" +#include "proto/timeoracle_rpc.pb.h" +#include "timeoracle/timeoracle.h" + +namespace tera { +namespace timeoracle { + +class ClosureGuard { +public: + ClosureGuard(::google::protobuf::Closure* done) : done_(done) { + } + + ~ClosureGuard() { + if (done_) { + done_->Run(); + } + } + + ::google::protobuf::Closure* release() { + auto done = done_; + done_ = nullptr; + return done; + } + +private: + ClosureGuard(const ClosureGuard&) = delete; +private: + ::google::protobuf::Closure* done_; +}; + +class RemoteTimeoracle : public TimeoracleServer { +public: + RemoteTimeoracle(int64_t start_timestamp) : timeoracle_(start_timestamp) { + } + + virtual void GetTimestamp(::google::protobuf::RpcController* controller, + const ::tera::GetTimestampRequest* request, + ::tera::GetTimestampResponse* response, + ::google::protobuf::Closure* done) { + ClosureGuard closure_guard(done); + + int64_t count = request->count(); + int64_t start_timestamp = timeoracle_.GetTimestamp(count); + + if (start_timestamp) { + response->set_start_timestamp(start_timestamp); + response->set_count(count); + response->set_status(kTimeoracleOk); + } else { + response->set_status(kTimeoracleBusy); + } + } + + Timeoracle* GetTimeoracle() { + return &timeoracle_; + } + +private: + Timeoracle timeoracle_; +}; + +} // namespace timeoracle +} // namespace tera + +#endif // TERA_TIMEORACLE_REMOTE_TIMEORACLE_H diff --git a/src/timeoracle/test/timeoracle_test.cc b/src/timeoracle/test/timeoracle_test.cc new file mode 100644 index 000000000..e7b6f4472 --- /dev/null +++ b/src/timeoracle/test/timeoracle_test.cc @@ -0,0 +1,78 @@ +// Copyright (c) 2017, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include + +#include +#include +#include +#include "timeoracle/timeoracle.h" +#include "utils/utils_cmd.h" + +DECLARE_string(log_dir); +DECLARE_string(tera_coord_type); +DECLARE_string(tera_leveldb_env_type); +DECLARE_string(tera_fake_zk_path_prefix); + +namespace tera { +namespace timeoracle { + +class TimeoracleTest: public ::testing::Test { +public: +}; + +TEST_F(TimeoracleTest, UniqueTimestampMsTest) { + int64_t ts0 = Timeoracle::UniqueTimestampMs(); + for (int i = 0; i < 10000; ++i) { + int64_t ts = Timeoracle::UniqueTimestampMs(); + EXPECT_LT(ts0, ts); + ts0 = ts; + } +} + +TEST_F(TimeoracleTest, TimeoracleFunc) { + Timeoracle to(1024LL); + + auto tmp = to.GetTimestamp(10LL); + EXPECT_EQ(tmp, 0); + + tmp = to.UpdateLimitTimestamp(10LL); + EXPECT_EQ(tmp, 10); + + tmp = to.GetTimestamp(10LL); + EXPECT_EQ(tmp, 0); + + tmp = to.UpdateLimitTimestamp(2000LL); + EXPECT_EQ(tmp, 2000); + + tmp = to.GetTimestamp(10LL); + EXPECT_EQ(tmp, 1044); + + tmp = to.GetTimestamp(10LL); + EXPECT_EQ(tmp, 1054); + + EXPECT_EQ(to.GetStartTimestamp(), 1064); + + tmp = to.UpdateStartTimestamp(); + + EXPECT_GT(tmp, 1064); + + auto new_ts = to.GetTimestamp(10LL); + EXPECT_EQ(new_ts, 0); +} + +} // namespace timeoracle +} // namespace tera + +int main(int argc, char** argv) { + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::InitGoogleLogging(argv[0]); + FLAGS_tera_coord_type = "fake_zk"; + FLAGS_tera_leveldb_env_type = "local"; + + tera::utils::SetupLog("timeorcale_test"); + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} + diff --git a/src/timeoracle/timeoracle.cc b/src/timeoracle/timeoracle.cc new file mode 100644 index 000000000..9d755445b --- /dev/null +++ b/src/timeoracle/timeoracle.cc @@ -0,0 +1,13 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "timeoracle/timeoracle.h" + +namespace tera { +namespace timeoracle { + +std::atomic Timeoracle::s_last_timestamp_ms; + +} // namespace timeoracle +} // namespace tera diff --git a/src/timeoracle/timeoracle.h b/src/timeoracle/timeoracle.h new file mode 100644 index 000000000..eb690de56 --- /dev/null +++ b/src/timeoracle/timeoracle.h @@ -0,0 +1,124 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_TIMEORACLE_TIMEORACLE_H_ +#define TERA_TIMEORACLE_TIMEORACLE_H_ + +#include +#include +#include +#include +#include + +namespace tera { +namespace timeoracle { + +constexpr int64_t kTimestampPerMilliSecond = 10000ULL; +constexpr int64_t kTimestampPerSecond = kTimestampPerMilliSecond * 1000ULL; +constexpr int64_t kBaseTimestampMilliSecond = 1483200000000ULL; // 20170101 00:00 + +inline int64_t clock_realtime_ms() { + struct timespec tp; + ::clock_gettime(CLOCK_REALTIME, &tp); + return tp.tv_sec * 1000ULL + tp.tv_nsec / 1000000ULL - kBaseTimestampMilliSecond; +} + +class Timeoracle { +public: + Timeoracle(int64_t start_timestamp) : start_timestamp_(start_timestamp), + limit_timestamp_(0) { + } + + // if num == 0, see next timstamp + // if return 0, allocate timestamp failed + int64_t GetTimestamp(int64_t num) { + int64_t start_timestamp = start_timestamp_.fetch_add(num); + + if ((start_timestamp + num) >= limit_timestamp_) { + return 0; + } + + return start_timestamp; + } + + int64_t UpdateLimitTimestamp(int64_t limit_timestamp) { + if (limit_timestamp > limit_timestamp_) { + limit_timestamp_ = limit_timestamp; + } else { + LOG(ERROR) << "update limit timestamp failed, limit_timestamp_=" << limit_timestamp_ + << ",update to " << limit_timestamp; + return 0; + } + return limit_timestamp; + } + + int64_t UpdateStartTimestamp() { + const int64_t cur_timestamp = CurrentTimestamp(); + + int64_t start_timestamp = 0; + while (1) { + start_timestamp = start_timestamp_; + if (start_timestamp < cur_timestamp) { + if (start_timestamp_.compare_exchange_strong(start_timestamp, cur_timestamp)) { + return cur_timestamp; + } + continue; + } + + int64_t limit_timestamp = limit_timestamp_; + if (start_timestamp > limit_timestamp) { + if (start_timestamp_.compare_exchange_strong(start_timestamp, limit_timestamp)) { + LOG(WARNING) << "adjust start timestamp to limit timestamp " << limit_timestamp; + return limit_timestamp; + } + continue; + } + + break; + } + + LOG(INFO) << "ignore to adjust start timestamp, current timestamp is " << cur_timestamp; + return start_timestamp; + } + + int64_t GetStartTimestamp() const { + return start_timestamp_; + } + + int64_t GetLimitTimestamp() const { + return limit_timestamp_; + } + +private: + std::atomic start_timestamp_; + std::atomic limit_timestamp_; + +public: + static int64_t UniqueTimestampMs() { + while (true) { + int64_t ts = clock_realtime_ms(); + int64_t last_timestamp_ms = s_last_timestamp_ms; + + if (ts <= last_timestamp_ms) { + return s_last_timestamp_ms.fetch_add(1) + 1; + } + + if (s_last_timestamp_ms.compare_exchange_strong(last_timestamp_ms, ts)) { + return ts; + } + } + } + + static int64_t CurrentTimestamp() { + return UniqueTimestampMs() * kTimestampPerMilliSecond; + } + +private: + static std::atomic s_last_timestamp_ms; +}; + +} // namespace timeoracle +} // namespace tera + +#endif // TERA_TIMEORACLE_TIMEORACLE_H_ diff --git a/src/timeoracle/timeoracle_entry.cc b/src/timeoracle/timeoracle_entry.cc new file mode 100644 index 000000000..8bff587ad --- /dev/null +++ b/src/timeoracle/timeoracle_entry.cc @@ -0,0 +1,174 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "timeoracle/timeoracle_entry.h" + +#include +#include +#include +#include "common/net/ip_address.h" +#include "common/this_thread.h" +#include "utils/utils_cmd.h" + +#include "timeoracle/remote_timeoracle.h" +#include "timeoracle/timeoracle_zk_adapter.h" + +DECLARE_string(tera_local_addr); +DECLARE_string(tera_timeoracle_port); +DECLARE_int32(tera_timeoracle_refresh_lease_second); +DECLARE_int32(tera_timeoracle_max_lease_second); +DECLARE_bool(tera_timeoracle_mock_enabled); +DECLARE_int32(tera_timeoracle_work_thread_num); +DECLARE_int32(tera_timeoracle_io_service_pool_size); +DECLARE_string(tera_coord_type); + +namespace tera { +namespace timeoracle { + +TimeoracleEntry::TimeoracleEntry() : + remote_timeoracle_(nullptr), + startup_timestamp_(0), + need_quit_(false) { + sofa::pbrpc::RpcServerOptions rpc_options; + rpc_options.work_thread_num = FLAGS_tera_timeoracle_work_thread_num; + rpc_options.io_service_pool_size = FLAGS_tera_timeoracle_io_service_pool_size; + rpc_options.no_delay = false; //use Nagle's Algorithm + rpc_options.write_buffer_base_block_factor = 0; //64Bytes per malloc + rpc_options.read_buffer_base_block_factor = 7; //8kBytes per malloc + sofa_pbrpc_server_.reset(new sofa::pbrpc::RpcServer(rpc_options)); + + if (FLAGS_tera_local_addr.empty()) { + local_addr_ = utils::GetLocalHostName()+ ":" + FLAGS_tera_timeoracle_port; + } else { + local_addr_ = FLAGS_tera_local_addr + ":" + FLAGS_tera_timeoracle_port; + } +} + +bool TimeoracleEntry::Start() { + if (!InitZKAdaptor()) { + return false; + } + + int64_t current_timestamp = Timeoracle::CurrentTimestamp(); + if (startup_timestamp_ < current_timestamp) { + startup_timestamp_ = current_timestamp; + } else { + LOG(WARNING) << "startup timestamp big than current timestamp," + << "startup timestamp is " << startup_timestamp_ + << "current timestamp is " << current_timestamp; + } + + LOG(INFO) << "set startup timestamp to " << startup_timestamp_; + + if (!StartServer()) { + return false; + } + + return true; +} + +TimeoracleEntry::~TimeoracleEntry() { + need_quit_ = true; + if (lease_thread_.joinable()) { + lease_thread_.join(); + } +} + +bool TimeoracleEntry::InitZKAdaptor() { + if (FLAGS_tera_timeoracle_mock_enabled) { + LOG(INFO) << "mock mode" ; + zk_adapter_.reset(new TimeoracleMockAdapter(local_addr_)); + } else if (FLAGS_tera_coord_type == "zk") { + LOG(INFO) << "zk mode" ; + zk_adapter_.reset(new TimeoracleZkAdapter(local_addr_)); + } else if (FLAGS_tera_coord_type == "ins") { + LOG(INFO) << "ins mode" ; + zk_adapter_.reset(new TimeoracleInsAdapter(local_addr_)); + } else { + LOG(FATAL) << "invalid configure for coord service, please check " + << "--tera_timeoracle_mock_enabled=true or " + << "--tera_coord_type=zk|ins"; + assert(0); + } + + return zk_adapter_->Init(&startup_timestamp_); +} + +bool TimeoracleEntry::StartServer() { + IpAddress timeoracle_addr("0.0.0.0", FLAGS_tera_timeoracle_port); + LOG(INFO) << "Start timeoracle RPC server at: " << timeoracle_addr.ToString(); + + remote_timeoracle_ = new RemoteTimeoracle(startup_timestamp_); + std::thread lease_thread(&TimeoracleEntry::LeaseThread, this); + lease_thread_ = std::move(lease_thread); + + auto timeoracle = remote_timeoracle_->GetTimeoracle(); + + while (startup_timestamp_ < timeoracle->GetLimitTimestamp()) { + if (need_quit_) { + return false; + } + ThisThread::Sleep(100); + } + + sofa_pbrpc_server_->RegisterService(remote_timeoracle_); + if (!sofa_pbrpc_server_->Start(timeoracle_addr.ToString())) { + LOG(ERROR) << "start timeoracle RPC server error"; + return false; + } + + LOG(INFO) << "finish start timeoracle RPC server"; + return true; +} + +bool TimeoracleEntry::Run() { + if (need_quit_) { + return false; + } + + int64_t start_timestamp = remote_timeoracle_->GetTimeoracle()->UpdateStartTimestamp(); + + VLOG(100) << "adjust start timestamp finished, start timestmap is " << start_timestamp; + + ThisThread::Sleep(1000); + return true; +} + +void TimeoracleEntry::ShutdownServer() { + need_quit_ = true; + sofa_pbrpc_server_->Stop(); +} + +void TimeoracleEntry::LeaseThread() { + auto timeoracle = remote_timeoracle_->GetTimeoracle(); + + while (!need_quit_) { + int64_t start_timestamp = timeoracle->GetStartTimestamp(); + int64_t limit_timestamp = timeoracle->GetLimitTimestamp(); + int64_t refresh_lease_timestamp = + FLAGS_tera_timeoracle_refresh_lease_second * kTimestampPerSecond; + + if (start_timestamp + refresh_lease_timestamp >= limit_timestamp) { + // need to require lease + if (limit_timestamp < start_timestamp) { + limit_timestamp = start_timestamp; + } + + int64_t next_limit_timestamp = + limit_timestamp + FLAGS_tera_timeoracle_max_lease_second * kTimestampPerSecond; + + if (!zk_adapter_->UpdateTimestamp(next_limit_timestamp)) { + need_quit_ = true; + return; + } + + timeoracle->UpdateLimitTimestamp(next_limit_timestamp); + } + + ThisThread::Sleep(1000); + } +} + +} // namespace timeoracle +} // namespace tera diff --git a/src/timeoracle/timeoracle_entry.h b/src/timeoracle/timeoracle_entry.h new file mode 100644 index 000000000..356ae452a --- /dev/null +++ b/src/timeoracle/timeoracle_entry.h @@ -0,0 +1,49 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_TIMEORACLE_TIMEORACLE_ENTRY_H_ +#define TERA_TIMEORACLE_TIMEORACLE_ENTRY_H_ + +#include + +#include "tera_entry.h" +#include +#include +#include + +namespace tera { +namespace timeoracle { + +class RemoteTimeoracle; +class TimeoracleZkAdapterBase; + +class TimeoracleEntry : public TeraEntry { +public: + TimeoracleEntry(); + ~TimeoracleEntry(); + + + virtual bool Start() override; + virtual bool Run() override; + virtual void ShutdownServer() override; + +private: + bool InitZKAdaptor(); + bool StartServer(); + void LeaseThread(); + +private: + std::string local_addr_; + RemoteTimeoracle* remote_timeoracle_; + std::unique_ptr sofa_pbrpc_server_; + int64_t startup_timestamp_; + std::unique_ptr zk_adapter_; + std::thread lease_thread_; + std::atomic need_quit_; +}; + +} // namespace timeoracle +} // namespace tera + +#endif // TERA_TIMEORACLE_TIMEORACLE_ENTRY_H_ diff --git a/src/timeoracle/timeoracle_zk_adapter.cc b/src/timeoracle/timeoracle_zk_adapter.cc new file mode 100644 index 000000000..58dd4a554 --- /dev/null +++ b/src/timeoracle/timeoracle_zk_adapter.cc @@ -0,0 +1,477 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include +#include "timeoracle/timeoracle_zk_adapter.h" +#include "common/file/file_path.h" +#include "common/this_thread.h" +#include "types.h" +#include "zk/zk_util.h" +#include "ins_sdk.h" + +DECLARE_string(tera_zk_addr_list); +DECLARE_string(tera_zk_root_path); +DECLARE_string(tera_fake_zk_path_prefix); +DECLARE_int32(tera_zk_timeout); +DECLARE_int64(tera_zk_retry_period); +DECLARE_int32(tera_zk_retry_max_times); + +DECLARE_string(tera_ins_addr_list); +DECLARE_string(tera_ins_root_path); +DECLARE_int64(tera_master_ins_session_timeout); +DECLARE_string(tera_timeoracle_mock_root_path); + +namespace tera { +namespace timeoracle { + +void TimeoracleZkAdapterBase::OnNodeValueChanged(const std::string& path, + const std::string& value) { + LOG(INFO) << "zk OnNodeValueChanged, path=" << path; +} + +void TimeoracleZkAdapterBase::OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list) { + LOG(INFO) << "zk OnChildrenChanged, path=" << path; +} + +void TimeoracleZkAdapterBase::OnNodeCreated(const std::string& path) { + LOG(INFO) << "zk OnNodeCreated, path=" << path; +} + +void TimeoracleZkAdapterBase::OnNodeDeleted(const std::string& path) { + LOG(INFO) << "zk OnNodeDeleted, path=" << path; + Finalize(); + _Exit(EXIT_FAILURE); +} + +void TimeoracleZkAdapterBase::OnWatchFailed(const std::string& path, int watch_type, + int err) { + LOG(INFO) << "zk OnWatchFailed, path=" << path; + Finalize(); + _Exit(EXIT_FAILURE); +} + +void TimeoracleZkAdapterBase::OnSessionTimeout() { + LOG(ERROR) << "zk session timeout!"; + _Exit(EXIT_FAILURE); +} + +TimeoracleZkAdapter::~TimeoracleZkAdapter() { +} + +bool TimeoracleZkAdapter::Init(int64_t* last_timestamp) { + if (!InitZk()) { + return false; + } + + if (!LockTimeoracleLock()) { + return false; + } + + if (ReadTimestamp(last_timestamp)) { + LOG(INFO) << "read timestamp sucess,get start_timestamp=" << *last_timestamp; + return CreateTimeoracleNode(); + } + + return false; +} + +bool TimeoracleZkAdapter::CreateTimeoracleNode() { + LOG(INFO) << "try create timeoracle nod,path=" << kTimeoracleNodePath; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!CreateEphemeralNode(kTimeoracleNodePath, server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to create timeoracle node"; + return false; + } + LOG(ERROR) << "retry create timeoracle node in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "create timeoracle node success"; + return true; +} + +bool TimeoracleZkAdapter::InitZk() { + LOG(INFO) << "try to init zk,zk_addr_list=" << FLAGS_tera_zk_addr_list + << ",zk_root_path=" << FLAGS_tera_zk_root_path; + int zk_errno = zk::ZE_OK; + int32_t retry_count = 0; + while (!ZooKeeperAdapter::Init(FLAGS_tera_zk_addr_list, + FLAGS_tera_zk_root_path, + FLAGS_tera_zk_timeout, + server_addr_, &zk_errno)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to init zk: " << zk::ZkErrnoToString(zk_errno); + return false; + } + LOG(ERROR) << "init zk fail: " << zk::ZkErrnoToString(zk_errno) + << ". retry in " << FLAGS_tera_zk_retry_period << " ms, retry: " + << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "init zk success"; + return true; +} + +bool TimeoracleZkAdapter::LockTimeoracleLock() { + LOG(INFO) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!SyncLock(kTimeoracleLockPath, &zk_errno, -1)) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to acquire timeoracle lock"; + return false; + } + LOG(ERROR) << "retry lock timeoracle lock in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + LOG(INFO) << "acquire timeoracle lock success"; + return true; +} + +bool TimeoracleZkAdapter::ReadTimestamp(int64_t* timestamp) { + LOG(INFO) << "try to read timestamp, path=" << kTimeoracleTimestampPath; + + std::string timestamp_str; + int32_t retry_count = 0; + int zk_errno = zk::ZE_OK; + while (!ReadNode(kTimeoracleTimestampPath, ×tamp_str, &zk_errno) + && zk_errno != zk::ZE_NOT_EXIST) { + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(ERROR) << "fail to read timestamp node"; + return false; + } + LOG(ERROR) << "retry read timestamp node in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + } + if (zk_errno == zk::ZE_NOT_EXIST) { + *timestamp = 0; + return true; + } + + char * pEnd = nullptr; + *timestamp = ::strtoull(timestamp_str.c_str(), &pEnd, 10); + if (*pEnd != '\0') { + // TODO (chenzongjia) + LOG(WARNING) << "read invalid timestamp value=" << timestamp_str; + return false; + } + + LOG(INFO) << "read timestamp value=" << timestamp_str; + + return true; +} + +bool TimeoracleZkAdapter::UpdateTimestamp(int64_t timestamp) { + char timestamp_str[64]; + snprintf(timestamp_str, sizeof(timestamp_str), "%lu", timestamp); + LOG(INFO) << "try to update timestamp to " << timestamp; + int zk_errno = zk::ZE_OK; + while (!WriteNode(kTimeoracleTimestampPath, timestamp_str, &zk_errno) + && zk_errno != zk::ZE_NOT_EXIST) { + return false; + /* + if (retry_count++ >= FLAGS_tera_zk_retry_max_times) { + LOG(INFO) << "fail to update timestamp"; + return false; + } + LOG(ERROR) << "retry update timestamp in " + << FLAGS_tera_zk_retry_period << " ms, retry=" << retry_count; + ThisThread::Sleep(FLAGS_tera_zk_retry_period); + zk_errno = zk::ZE_OK; + */ + } + if (zk_errno == zk::ZE_OK) { + LOG(INFO) << "update zk path=" << kTimeoracleTimestampPath << " to " + << timestamp_str << " success."; + return true; + } + + LOG(INFO) << "timestamp node not exist, try create timestamp node"; + zk_errno = zk::ZE_OK; + while (!CreatePersistentNode(kTimeoracleTimestampPath, timestamp_str, &zk_errno)) { + return false; + } + LOG(INFO) << "create timestamp node success"; + return true; + +} + +TimeoracleInsAdapter::~TimeoracleInsAdapter() { + if (ins_sdk_) { + std::string lock_path = FLAGS_tera_ins_root_path + kTimeoracleLockPath; + galaxy::ins::sdk::SDKError err; + ins_sdk_->UnLock(lock_path, &err); + } +} + +bool TimeoracleInsAdapter::Init(int64_t* last_timestamp) { + if (!InitInsAndLock()) { + return false; + } + + if (ReadTimestamp(last_timestamp)) { + LOG(INFO) << "read timestamp sucess,get start_timestamp=" << *last_timestamp; + return CreateTimeoracleNode(); + } + + return false; +} + +bool TimeoracleInsAdapter::CreateTimeoracleNode() { + std::string put_path = FLAGS_tera_ins_root_path + kTimeoracleNodePath; + + LOG(INFO) << "try write timeoracle nod,path=" << put_path; + + galaxy::ins::sdk::SDKError err; + + if (!ins_sdk_->Put(put_path, server_addr_, &err)) { + LOG(ERROR) << "update timestamp node, path=" << put_path << ",failed " + << ins_sdk_->ErrorToString(err); + return false; + } + + LOG(INFO) << "update timeoracle node success"; + return true; +} + +static void InsOnSessionTimeout(void * context) { + TimeoracleInsAdapter* ins_adp = static_cast(context); + ins_adp->OnSessionTimeout(); +} + +static void InsOnLockChange(const galaxy::ins::sdk::WatchParam& param, + galaxy::ins::sdk::SDKError error) { + TimeoracleInsAdapter* ins_adp = static_cast(param.context); + ins_adp->OnLockChange(param.value, param.deleted); +} + +bool TimeoracleInsAdapter::InitInsAndLock() { + MutexLock lock(&mutex_); + LOG(INFO) << "try to init ins,ins_addr_list=" << FLAGS_tera_ins_addr_list + << ",ins_root_path=" << FLAGS_tera_ins_root_path; + ins_sdk_ = new galaxy::ins::sdk::InsSDK(FLAGS_tera_ins_addr_list); + ins_sdk_->SetTimeoutTime(FLAGS_tera_master_ins_session_timeout); + + std::string lock_path = FLAGS_tera_ins_root_path + kTimeoracleLockPath; + + galaxy::ins::sdk::SDKError err; + + ins_sdk_->RegisterSessionTimeout(InsOnSessionTimeout, this); + + if (!ins_sdk_->Lock(lock_path, &err)) { + LOG(ERROR) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath << " failed," + << ins_sdk_->ErrorToString(err); + return false; + } + + LOG(INFO) << "try to lock timeoracle lock,path=" << kTimeoracleLockPath << " success"; + + if (!ins_sdk_->Watch(lock_path, InsOnLockChange, this, &err)) { + LOG(ERROR) << "try to watch timeoracle lock,path=" << kTimeoracleLockPath << " failed," + << ins_sdk_->ErrorToString(err); + return false; + } + + LOG(INFO) << "try to watch timeoracle lock,path=" << kTimeoracleLockPath << " success"; + + return true; +} + +bool TimeoracleInsAdapter::ReadTimestamp(int64_t* timestamp) { + std::string read_path = FLAGS_tera_ins_root_path + kTimeoracleTimestampPath; + + LOG(INFO) << "try to read timestamp, path=" << read_path; + + std::string timestamp_str; + galaxy::ins::sdk::SDKError err; + + if (!ins_sdk_->Get(read_path, ×tamp_str, &err)) { + if (err == galaxy::ins::sdk::SDKError::kNoSuchKey) { + *timestamp = 0; + return true; + } + + LOG(ERROR) << "try to read timestamp, path=" << read_path << ",failed " + << ins_sdk_->ErrorToString(err); + return false; + } + + char * pEnd = nullptr; + *timestamp = ::strtoull(timestamp_str.c_str(), &pEnd, 10); + if (*pEnd != '\0') { + // TODO (chenzongjia) + LOG(WARNING) << "read invalid timestamp value=" << timestamp_str; + return false; + } + + LOG(INFO) << "read timestamp value=" << timestamp_str; + return true; +} + +bool TimeoracleInsAdapter::UpdateTimestamp(int64_t timestamp) { + char buf[64]; + snprintf(buf, sizeof(buf), "%lu", timestamp); + LOG(INFO) << "try to update timestamp to " << timestamp; + + std::string timestamp_str(buf); + galaxy::ins::sdk::SDKError err; + std::string put_path = FLAGS_tera_ins_root_path + kTimeoracleTimestampPath; + + if (!ins_sdk_->Put(put_path, timestamp_str, &err)) { + LOG(ERROR) << "update timestamp, path=" << put_path << ",failed " + << ins_sdk_->ErrorToString(err); + return false; + } + + return true; +} + +void TimeoracleInsAdapter::OnLockChange(std::string session_id, bool deleted) { + if (deleted || session_id != ins_sdk_->GetSessionID()) { + LOG(ERROR) << "timeoracle lock losted"; + exit(1); + } +} + +class FdGuard { +public: + explicit FdGuard(int fd) : fd_(fd) {} + + FdGuard() : fd_(-1) {} + + ~FdGuard() { + if (fd_ >= 0) { + ::close(fd_); + } + } + + operator int() const { + return fd_; + } + + void reset(int fd) { + if (fd_ >= 0) { + ::close(fd_); + } + fd_ = fd; + } + + int relese() { + const int ret = fd_; + fd_ = -1; + return ret; + } + +private: + FdGuard(const FdGuard&) = delete; + void operator=(const FdGuard&) = delete; + int fd_; +}; + +// not thread safe +bool TimeoracleMockAdapter::Init(int64_t* last_timestamp) { + std::string lock_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleLockPath; + static FdGuard lock_fd(::open(lock_path.c_str(), O_CREAT | O_RDWR, 0666)); + + if (lock_fd < 0) { + return false; + } + + LOG(INFO) << "TimeoracleMockAdapter try to get lock for file=" << lock_path; + + if (::flock(lock_fd, LOCK_EX) < 0) { + LOG(WARNING) << "lock file failed for path=" << lock_path; + return false; + } + + LOG(INFO) << "TimeoracleMockAdapter got the lock for file=" << lock_path; + + std::string get_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleTimestampPath; + + FdGuard tmp_fd(::open(get_path.c_str(), O_CREAT | O_RDWR, 0666)); + + if (tmp_fd < 0) { + LOG(WARNING) << "open file failed for file=" << get_path; + return false; + } + + char buf[64]; + + ssize_t len = pread(tmp_fd, buf, sizeof(buf), 0); + if (len < 0) { + LOG(WARNING) << "read file failed for file=" << get_path; + return false; + } + + if (len == 0) { + *last_timestamp = 0; + return true; + } + + buf[len] = '\0'; + char * pEnd = nullptr; + *last_timestamp = ::strtoull(buf, &pEnd, 10); + if (*pEnd != '\0') { + // TODO (chenzongjia) + LOG(WARNING) << "read invalid timestamp value=" << buf; + return false; + } + + LOG(INFO) << "read timestamp value=" << *last_timestamp; + + std::string put_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleNodePath; + + tmp_fd.reset(::open(put_path.c_str(), O_CREAT | O_RDWR, 0666)); + + if (tmp_fd < 0) { + LOG(WARNING) << "open file failed for file=" << put_path; + return false; + } + + if (::pwrite(tmp_fd, server_addr_.data(), server_addr_.size(), 0) + != (ssize_t)server_addr_.size()) { + LOG(WARNING) << "write file failed for file=" << put_path; + return false; + } + + return true; +} + +// not thread safe +bool TimeoracleMockAdapter::UpdateTimestamp(int64_t new_timestamp) { + std::string put_path = FLAGS_tera_timeoracle_mock_root_path + kTimeoracleTimestampPath; + FdGuard tmp_fd(::open(put_path.c_str(), O_CREAT | O_RDWR, 0666)); + + if (tmp_fd < 0) { + LOG(WARNING) << "open file failed for file=" << put_path; + return false; + } + + char buf[64]; + snprintf(buf, sizeof(buf), "%lu", new_timestamp); + std::string timestamp_str(buf); + LOG(INFO) << "try to update timestamp to " << put_path; + + if (::pwrite(tmp_fd, timestamp_str.data(), timestamp_str.size(), 0) + != (ssize_t)timestamp_str.size()) { + LOG(WARNING) << "write file failed for file=" << put_path; + return false; + } + + return true; +} + +} // namespace timeoracle +} // namespace tera diff --git a/src/timeoracle/timeoracle_zk_adapter.h b/src/timeoracle/timeoracle_zk_adapter.h new file mode 100644 index 000000000..b0f6a970c --- /dev/null +++ b/src/timeoracle/timeoracle_zk_adapter.h @@ -0,0 +1,124 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H +#define TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H + +#include +#include +#include "zk/zk_adapter.h" + +// forward declare +namespace galaxy{ +namespace ins{ +namespace sdk { + class InsSDK; +} +} +} + +namespace tera { +namespace timeoracle { + +class TimeoracleZkAdapterBase : public zk::ZooKeeperAdapter { +public: + virtual ~TimeoracleZkAdapterBase() {}; + + // not thread safe + virtual bool Init(int64_t* last_timestamp) = 0; + + // not thread safe + virtual bool UpdateTimestamp(int64_t new_timestamp) = 0; + + virtual void OnChildrenChanged(const std::string& path, + const std::vector& name_list, + const std::vector& data_list) override; + + virtual void OnNodeValueChanged(const std::string& path, + const std::string& value) override; + + virtual void OnNodeCreated(const std::string& path) override; + + virtual void OnNodeDeleted(const std::string& path) override; + + virtual void OnWatchFailed(const std::string& path, int watch_type, + int err) override; + + virtual void OnSessionTimeout() final; +}; + +class TimeoracleZkAdapter : public TimeoracleZkAdapterBase { +public: + TimeoracleZkAdapter(const std::string& server_addr) : server_addr_(server_addr) {} + + virtual ~TimeoracleZkAdapter(); + + virtual bool Init(int64_t* last_timestamp) override; + + virtual bool UpdateTimestamp(int64_t new_timestamp) override; + +private: + bool InitZk(); + + bool LockTimeoracleLock(); + + bool ReadTimestamp(int64_t* timestamp); + + bool CreateTimeoracleNode(); + +private: + std::string server_addr_; +}; + +class TimeoracleInsAdapter : public TimeoracleZkAdapterBase { +public: + TimeoracleInsAdapter(const std::string & server_addr) : server_addr_(server_addr) {} + + virtual ~TimeoracleInsAdapter(); + + virtual bool Init(int64_t* last_timestamp) override; + + virtual bool UpdateTimestamp(int64_t new_timestamp) override; + + void OnLockChange(std::string session_id, bool deleted); + +private: + bool InitInsAndLock(); + + bool ReadTimestamp(int64_t* timestamp); + + bool CreateTimeoracleNode(); + +private: + mutable Mutex mutex_; + std::string server_addr_; + galaxy::ins::sdk::InsSDK* ins_sdk_{NULL}; +}; + + +/* + * This is not zookeeper! + * Just used on onebox for tasting tera briefly. + * This is implemented through local file system. + * Not support watching. + */ +class TimeoracleMockAdapter: public TimeoracleZkAdapterBase { +public: + TimeoracleMockAdapter(const std::string& server_addr) : server_addr_(server_addr) { + } + + // not thread safe + virtual bool Init(int64_t* last_timestamp) override; + + // not thread safe + virtual bool UpdateTimestamp(int64_t new_timestamp) override; + +private: + std::string server_addr_; +}; + +} // namespace timeoracle +} // namespace tera + +#endif // TERA_TIMEORACLE_TIMEORACLE_ZK_ADAPTER_H diff --git a/src/timeoracle_main.cc b/src/timeoracle_main.cc new file mode 100644 index 000000000..3c7f713be --- /dev/null +++ b/src/timeoracle_main.cc @@ -0,0 +1,69 @@ +// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include +#include + +#include +#include + +#include "common/base/scoped_ptr.h" +#include "tera_entry.h" +#include "utils/utils_cmd.h" +#include "version.h" +#include "timeoracle/timeoracle_entry.h" + +DECLARE_string(tera_log_prefix); + +volatile sig_atomic_t g_quit = 0; + +static void SignalIntHandler(int sig) { + g_quit = 1; +} + +int main(int argc, char* argv[]) { + ::google::SetUsageMessage("./timeoracle --flagfile=xxx.flag"); + ::google::ParseCommandLineFlags(&argc, &argv, true); + ::google::InitGoogleLogging(argv[0]); + if (!FLAGS_tera_log_prefix.empty()) { + tera::utils::SetupLog(FLAGS_tera_log_prefix); + } else { + tera::utils::SetupLog("timeoracle"); + } + + if (argc > 1) { + std::string ext_cmd = argv[1]; + if (ext_cmd == "version") { + PrintSystemVersion(); + return 0; + } + } + + signal(SIGINT, SignalIntHandler); + signal(SIGTERM, SignalIntHandler); + + scoped_ptr entry(new tera::timeoracle::TimeoracleEntry()); + + if (!entry->Start()) { + return -1; + } + + while (!g_quit) { + if (!entry->Run()) { + LOG(ERROR) << "Server run error ,and then exit now "; + break; + } + } + if (g_quit) { + LOG(INFO) << "received interrupt signal from user, will stop"; + } + + if (!entry->Shutdown()) { + return -1; + } + + return 0; +} + +/* vim: set ts=4 sw=4 sts=4 tw=100 */ diff --git a/src/types.h b/src/types.h index bfad100da..1f50f0f8f 100644 --- a/src/types.h +++ b/src/types.h @@ -27,6 +27,10 @@ const std::string kTsListPath = "/ts"; const std::string kKickPath = "/kick"; const std::string kRootTabletNodePath = "/root_table"; const std::string kSafeModeNodePath = "/safemode"; +const std::string kTimeoracleNodePath = "/timeoracle"; +const std::string kTimeoracleLockPath = "/timeoracle-lock"; +const std::string kTimeoracleTimestampPath = "/timeoracle-timestamp"; +const std::string kClientsNodePath = "/clients"; const std::string kSms = "[SMS] "; const std::string kMail = "[MAIL] "; const int64_t kLatestTs = INT64_MAX; @@ -36,6 +40,16 @@ const uint64_t kRowkeySize = (64 << 10); // 64KB const uint64_t kQualifierSize = (64 << 10); // 64KB const uint64_t kValueSize = (32 << 20); // 32MB +// observer +const std::string kRowlockNodeIdListPath = "/id_lock"; +const std::string kRowlockNodeHostListPath = "/host_lock"; +const std::string kRowlockNodeNumPath = "/node_num"; +const std::string kRowlockProxyPath = "/proxy"; +const uint64_t kObserverWaitTime = 1000000; + +// global transaction +const char* const kNotifyColumnFamily = "_N_"; + } // namespace tera #endif // TERA_TYPES_H_ diff --git a/src/utils/atomic.h b/src/utils/atomic.h deleted file mode 100644 index 69434be09..000000000 --- a/src/utils/atomic.h +++ /dev/null @@ -1,110 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_UTILS_ATOMIC_H_ -#define TERA_UTILS_ATOMIC_H_ - -namespace tera { - -static inline int atomic_add(volatile int *mem, int add) -{ - asm volatile( - "lock xadd %0, (%1);" - : "=a"(add) - : "r"(mem), "a"(add) - : "memory" - ); - return add; -} - -static inline int64_t atomic_add64(volatile int64_t* mem, int64_t add) -{ - asm volatile( - "lock xaddq %0, (%1)" - : "=a" (add) - : "r" (mem), "a" (add) - : "memory" - ); - return add; -} - -static inline void atomic_inc(volatile int *mem) -{ - asm volatile( - "lock incl %0;" - : "=m"(*mem) - : "m"(*mem) - ); -} -static inline void atomic_inc64(volatile int64_t *mem) -{ - asm volatile( - "lock incq %0;" - : "=m"(*mem) - : "m"(*mem) - ); -} - -static inline void atomic_dec(volatile int *mem) -{ - asm volatile( - "lock decl %0;" - : "=m"(*mem) - : "m"(*mem) - ); -} - -static inline void atomic_dec64(volatile int64_t *mem) -{ - asm volatile( - "lock decq %0;" - : "=m"(*mem) - : "m"(*mem) - ); -} - -static inline int atomic_swap(volatile void *lockword, int value) -{ - asm volatile( - "lock xchg %0, (%1);" - : "=a"(value) - : "r"(lockword), "a"(value) - : "memory" - ); - return value; -} - -static inline int64_t atomic_swap64(volatile void *lockword, int64_t value) -{ - asm volatile( - "lock xchg %0, (%1);" - : "=a"(value) - : "r"(lockword), "a"(value) - : "memory" - ); - return value; -} - -static inline int atomic_comp_swap(volatile void *mem, int xchg, int cmp) -{ - asm volatile( - "lock cmpxchg %1, (%2)" - :"=a"(cmp) - :"d"(xchg), "r"(mem), "a"(cmp) - ); - return cmp; -} - -static inline int64_t atomic_comp_swap64(volatile void *mem, int64_t xchg, int64_t cmp) -{ - asm volatile( - "lock cmpxchg %1, (%2)" - :"=a"(cmp) - :"d"(xchg), "r"(mem), "a"(cmp) - ); - return cmp; -} - -} -#endif // TERA_UTILS_ATOMIC_H_ diff --git a/src/utils/counter.h b/src/utils/counter.h deleted file mode 100644 index 3f4da00a9..000000000 --- a/src/utils/counter.h +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_UTILS_COUNTER_H_ -#define TERA_UTILS_COUNTER_H_ - -#include - -#include "atomic.h" -#include "timer.h" - -namespace tera { - -class Counter { -public: - Counter() : val_(0) {} - int64_t Add(int64_t v) { - return atomic_add64(&val_, v) + v; - } - int64_t Sub(int64_t v) { - return atomic_add64(&val_, -v) - v; - } - int64_t Inc() { - return atomic_add64(&val_, 1) + 1; - } - int64_t Dec() { - return atomic_add64(&val_, -1) - 1; - } - int64_t Get() { - return val_; - } - int64_t Set(int64_t v) { - return atomic_swap64(&val_, v); - } - int64_t Clear() { - return atomic_swap64(&val_, 0); - } - -private: - volatile int64_t val_; -}; - -class AutoCounter { -public: - AutoCounter(Counter* counter, const char* msg1, const char* msg2 = NULL) - : counter_(counter), - msg1_(msg1), - msg2_(msg2) { - start_ = get_micros(); - counter_->Inc(); - } - ~AutoCounter() { - int64_t end = get_micros(); - if (end - start_ > 5000000) { - int64_t t = (end - start_) / 1000000; - if (!msg2_) { - fprintf(stderr, "%s [AutoCounter] %s hang for %ld s\n", - get_curtime_str().data(), msg1_, t); - } else { - fprintf(stderr, "%s [AutoCounter] %s %s hang for %ld s\n", - get_curtime_str().data(), msg1_, msg2_, t); - } - } - counter_->Dec(); - } - -private: - Counter* counter_; - int64_t start_; - const char* msg1_; - const char* msg2_; -}; -} - -#endif // TERA_UTILS_COUNTER_H_ diff --git a/src/utils/timer.h b/src/utils/timer.h deleted file mode 100644 index 62428c754..000000000 --- a/src/utils/timer.h +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright (c) 2015, Baidu.com, Inc. All Rights Reserved -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef TERA_UTILS_TIMER_H_ -#define TERA_UTILS_TIMER_H_ - -#include -#include - -namespace tera { - -static inline std::string get_curtime_str() { - struct tm tt; - char buf[20]; - time_t t = time(NULL); - strftime(buf, 20, "%Y%m%d-%H:%M:%S", localtime_r(&t, &tt)); - return std::string(buf, 17); -} - -static inline std::string get_curtime_str_plain() { - struct tm tt; - char buf[20]; - time_t t = time(NULL); - strftime(buf, 20, "%Y%m%d%H%M%S", localtime_r(&t, &tt)); - return std::string(buf); -} - -static inline int64_t get_micros() { - struct timespec ts; - clock_gettime(CLOCK_REALTIME, &ts); - return static_cast(ts.tv_sec) * 1000000 + static_cast(ts.tv_nsec) / 1000; -} - -static inline int64_t get_millis() { - return get_micros() / 1000; -} - -static inline int64_t get_unique_micros(int64_t ref) { - int64_t now; - do { - now = get_micros(); - } while (now == ref); - return now; -} - -static inline int64_t GetTimeStampInUs() { - return get_micros(); -} - -static inline int64_t GetTimeStampInMs() { - return get_millis(); -} - -} // namespace tera - -#endif // TERA_UTILS_TIMER_H_ diff --git a/src/zk/zk_adapter.cc b/src/zk/zk_adapter.cc old mode 100644 new mode 100755 index 1b83d6f87..9fd1aa1ac --- a/src/zk/zk_adapter.cc +++ b/src/zk/zk_adapter.cc @@ -51,7 +51,8 @@ bool ZooKeeperAdapter::Init(const std::string& server_list, const std::string& root_path, uint32_t session_timeout, const std::string& id, - int* zk_errno) { + int* zk_errno, + int wait_timeout) { MutexLock mutex(&state_mutex_); if (NULL != handle_) { @@ -79,7 +80,12 @@ bool ZooKeeperAdapter::Init(const std::string& server_list, } while (state_ == ZS_DISCONN || state_ == ZS_CONNECTING) { - state_cond_.Wait(); + if (wait_timeout > 0) { + state_cond_.TimeWait(wait_timeout); + break; + } else { + state_cond_.Wait(); + } } int code = ZE_OK; @@ -427,7 +433,7 @@ bool ZooKeeperAdapter::ListAndWatchChildren(const std::string& path, } } -bool ZooKeeperAdapter::CheckExist(const std::string&path, bool* is_exist, +bool ZooKeeperAdapter::CheckExist(const std::string& path, bool* is_exist, int* zk_errno) { MutexLock mutex(&state_mutex_); if (!ZooKeeperUtil::IsValidPath(path)) { diff --git a/src/zk/zk_adapter.h b/src/zk/zk_adapter.h index 56cf8e2b3..010efed75 100644 --- a/src/zk/zk_adapter.h +++ b/src/zk/zk_adapter.h @@ -9,7 +9,7 @@ #include #include -#include +#include #include "common/mutex.h" #include "common/thread_pool.h" @@ -17,6 +17,7 @@ #include "zk/zk_lock.h" #include "zk/zk_util.h" + namespace tera { namespace zk { @@ -39,7 +40,8 @@ class ZooKeeperAdapter { virtual ~ZooKeeperAdapter(); bool Init(const std::string& server_list, const std::string& root_path, - uint32_t session_timeout, const std::string& id, int* zk_errno); + uint32_t session_timeout, const std::string& id, int* zk_errno, + int wait_timeout = -1); // default wait until zk server ready void Finalize(); bool GetSessionId(int64_t* session_id, int* zk_errno); diff --git a/src/zk/zk_util.cc b/src/zk/zk_util.cc index 446ef6108..579a59f0d 100644 --- a/src/zk/zk_util.cc +++ b/src/zk/zk_util.cc @@ -9,7 +9,7 @@ #include #include -#include +#include #include "common/file/file_path.h" #include "common/file/file_stream.h"