diff --git a/commands/WPCOM_Sites_List.php b/commands/WPCOM_Sites_List.php index 4e4b4d4..dc74869 100644 --- a/commands/WPCOM_Sites_List.php +++ b/commands/WPCOM_Sites_List.php @@ -155,7 +155,7 @@ protected function initialize( InputInterface $input, OutputInterface $output ): $this->sites = get_wpcom_sites( array( 'include_domain_only' => 'true', - 'fields' => 'ID,name,URL,is_private,is_coming_soon,is_wpcom_atomic,jetpack,is_multisite,options', + 'fields' => 'ID,name,URL,is_private,is_coming_soon,is_wpcom_atomic,jetpack,options,is_multisite', ), ); $output->writeln( 'Successfully fetched ' . \count( $this->sites ) . ' WPCOM site(s).' ); @@ -175,12 +175,12 @@ protected function execute( InputInterface $input, OutputInterface $output ): in 'Site ID' => $site->ID, 'Site Name' => \preg_replace( '/[^a-zA-Z0-9\s&!\/|\'#.()-:]/', '', $site->name ), 'Domain' => $site->URL, // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase - 'Host' => $this->eval_which_host( $site ), + 'Host' => $this->eval_which_host( $site, $output ), 'ignore' => $this->eval_ignore_list( $site ), 'free_pass' => $this->eval_pass_list( $site ), 'is_private' => $this->eval_is_private( $site ), 'is_coming_soon' => $this->eval_is_coming_soon( $site ), - 'is_multisite' => $this->eval_is_multisite( $site ), + 'is_multisite' => $this->eval_is_multisite( $site, $output ), 'is_domain_only' => $this->eval_is_domain_only( $site ), ), $this->sites @@ -228,6 +228,8 @@ protected function execute( InputInterface $input, OutputInterface $output ): in 'Atomic sites' => $this->count_sites( $audited_site_list, 'Atomic', 'Host' ), 'Pressable sites' => $this->count_sites( $audited_site_list, 'Pressable', 'Host' ), 'Simple sites' => $this->count_sites( $audited_site_list, 'Simple', 'Host' ), + 'WP VIP sites' => $this->count_sites( $audited_site_list, 'wpvip.com', 'Host' ), + 'WP Engine sites' => $this->count_sites( $audited_site_list, 'WP Engine', 'Host' ), 'Other hosts' => $this->count_sites( $audited_site_list, 'Other', 'Host' ), 'PASSED sites' => $this->count_sites( $audited_site_list, 'PASS', 'Result' ), 'FAILED sites' => $this->count_sites( $audited_site_list, 'FAIL', 'Result' ), @@ -252,6 +254,8 @@ protected function execute( InputInterface $input, OutputInterface $output ): in 'Atomic sites' => $this->count_sites( $final_site_list, 'Atomic', 'Host' ), 'Pressable sites' => $this->count_sites( $final_site_list, 'Pressable', 'Host' ), 'Simple sites' => $this->count_sites( $final_site_list, 'Simple', 'Host' ), + 'WP VIP sites' => $this->count_sites( $final_site_list, 'wpvip.com', 'Host' ), + 'WP Engine sites' => $this->count_sites( $final_site_list, 'WP Engine', 'Host' ), 'Other hosts' => $this->count_sites( $final_site_list, 'Other', 'Host' ), 'Total sites' => count( $final_site_list ), ); @@ -340,34 +344,120 @@ private function prompt_export_excluded_columns_input( InputInterface $input, Ou /** * Tries to determine the host of the site. + * Checks are ordered from least expensive to most expensive: + * 1. Check data returned by the wpcom /me/sites API. + * 2. Check site against the known list of Pressable sites. + * 3. Check the site's well-known/hosting-provider. + * 3a. Check the x-powered-by header. + * 4. Check against wpcom site-profiler API. * * @param \stdClass $site The site object. + * @param OutputInterface $output The output object. * * @return string */ - protected function eval_which_host( \stdClass $site ): string { - if ( true === $site->is_wpcom_atomic ) { + protected function eval_which_host( \stdClass $site, $output ): string { + # Strip any subdirectory from the URL, eg: https://example.com/subdir becomes https://example.com + # TODO - check for multisite before stripping the URL becuase it will mess with subdirectory multisites + if ( $site->is_wpcom_atomic && true === $site->is_wpcom_atomic ) { $server = 'Atomic'; + } elseif ( $site->URL && strpos( $site->URL, '.wordpress.com' ) !== false ) { + $server = 'Simple'; // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + } elseif ( isset( $site->options->unmapped_url ) && strpos( $site->options->unmapped_url, '.wordpress.com' ) !== false ) { + $server = 'Simple'; // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + } elseif ( isset( $site->options->unmapped_url ) && strpos( $site->options->unmapped_url, '.mystagingwebsite.com' ) !== false ) { + $server = 'Pressable'; // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + } elseif ( $site->URL && ( + strpos( $site->URL, '.wpengine.com' ) !== false || + strpos( $site->URL, '.wpenginepowered.com' ) !== false ) ) { + $server = 'WP Engine'; // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase } elseif ( true === $site->jetpack ) { $pressable_urls = array_column( $this->pressable_sites, 'url' ); if ( in_array( parse_url( $site->URL, PHP_URL_HOST ), $pressable_urls, true ) ) { // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase $server = 'Pressable'; } else { - // TODO: Handle the wpvip.com sites (that's the actual value of the following variable). - $known_host = get_remote_content( $site->URL . '/.well-known/hosting-provider' ); // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + if ( isset( $site->URL ) ) { + $parts = parse_url( $site->URL ); + if ( isset( $parts['scheme'], $parts['host'] ) ) { + $site_url = $parts['scheme'] . '://' . $parts['host']; + } + } + $known_host = get_remote_content( $site_url . '/.well-known/hosting-provider' ); // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase if ( $known_host && 200 === $known_host['headers']['http_code'] ) { $server = \str_replace( "\n", '', $known_host['body'] ); - if ( 'Pressable' !== $server ) { - $server = 'Other'; + switch ( $server ) { + case 'Pressable': + case 'wpvip.com': + break; + default: + $server = 'Other'; + break; } + } else if ( 404 === $known_host['headers']['http_code'] && isset( $known_host['headers']['X-Powered-By'] ) ) { + strpos( $known_host['headers']['X-Powered-By'], 'WP Engine' ) !== false ? $server = 'WP Engine' : $server = 'Other'; + } + else { + $server = $this->check_wpcom_site_profiles( $site->URL, $output ); + } + } + } else { + $server = $this->check_wpcom_site_profiles( $site->URL, $output ); + } + + return $server; + } + + /** + * Check WordPress.com/site-profiles API for the site. + * + * @param string $site_url The site URL. + * @param OutputInterface $output The output object. + * + * @return string + */ + protected function check_wpcom_site_profiles( string $site_url, $output ): string { + $server = 'Other'; + $parts = parse_url( $site_url ); + if ( isset( $parts['host'] ) ) { + $site_url = $parts['host']; + } + + $maxAttempts = 5; + $attempt = 0; + $delay = 10; // initial delay in seconds + + do { + $site_profiles = get_remote_content( 'https://public-api.wordpress.com/wpcom/v2/site-profiler/hosting-provider/' . $site_url ); + $httpCode = $site_profiles['headers']['http_code'] ?? 0; + if ( 429 !== $httpCode ) { + break; + } + $output->writeln( "Rate limited by WP.com site-profiler. Retrying in {$delay} seconds..." ); + sleep( $delay ); + $delay *= 2; + $attempt++; + } while ( $attempt < $maxAttempts ); + + if ( 429 === ( $site_profiles['headers']['http_code'] ?? 0 ) ) { + $output->writeln( "Exceeded maximum retry attempts for WP.com site-profiler. Skipping {$site_url}." ); + return $server; + } + + if ( $site_profiles && 200 === $site_profiles['headers']['http_code'] ) { + $site_profiles = json_decode( $site_profiles['body'], true ); + if ( isset( $site_profiles['hosting_provider']['is_cdn'] ) && $site_profiles['hosting_provider']['is_cdn'] ) { + $server = 'Other'; + } else { + if ( isset( $site_profiles['hosting_provider']['name'] ) ) { + $server = $site_profiles['hosting_provider']['name']; } else { $server = 'Other'; } } } else { - $server = 'Simple'; // Need a better way to determine if site is simple. For example, 410'd Jurassic Ninja sites will show as Simple. + $server = 'Other'; } - + return $server; } @@ -434,21 +524,28 @@ protected function eval_is_coming_soon( \stdClass $site ): string { * Evaluates if a site is single or multisite. * * @param \stdClass $site Site object to be evaluated. - * + * @param OutputInterface $output The output object. + * * @return string */ - protected function eval_is_multisite( \stdClass $site ): string { + protected function eval_is_multisite( \stdClass $site, $output ): string { /** * An alternative to this implementation is to compare $site->URL against * $site->options->main_network_site, however all simple sites are returned * as multisites. More investigation required. */ - if ( true === $site->is_multisite ) { + if ( isset( $site->is_multisite ) && true === $site->is_multisite ) { + # Sites with ".wordpress.com" in the unmmaped_url are Simple sites and therefore are subsites of the wp.com multisite. + if ( isset( $site->options->unmapped_url ) && strpos( $site->options->unmapped_url, '.wordpress.com' ) !== false ) { // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase + return ''; + } + # Check for multisite patterns in the URL. In reality, this is a very weak check. + # It only checkes for a trailing / on the .com and .org TLDs as an indicator of a subdirectory multisite. foreach ( $this->multisite_patterns as $pattern ) { if ( str_contains( $site->URL, $pattern ) ) { // phpcs:ignore WordPress.NamingConventions.ValidVariableName.UsedPropertyNotSnakeCase return 'is_subsite'; } - if ( 'Simple' !== $this->eval_which_host( $site ) ) { + if ( 'Simple' !== $this->eval_which_host( $site, $output ) ) { return 'is_parent'; } }