From 20046a8f6ec6eb299961276b975b0c45c1084721 Mon Sep 17 00:00:00 2001 From: gazayas Date: Thu, 26 Nov 2020 17:42:20 +0900 Subject: [PATCH 1/2] Change web scraping query to retrieve proper title --- lib/bible_gateway.rb | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/bible_gateway.rb b/lib/bible_gateway.rb index fe65476..af81d32 100644 --- a/lib/bible_gateway.rb +++ b/lib/bible_gateway.rb @@ -63,7 +63,7 @@ def version=(version) def lookup(passage) response = Typhoeus.get(passage_url(passage), followlocation: true) doc = Nokogiri::HTML(response.body) - scrape_passage(doc, @version) + scrape_passage(doc, @version, passage) end def old_lookup(passage) @@ -81,9 +81,10 @@ def old_passage_url(passage) "#{CLASSIC_GATEWAY_URL}/passage/?search=#{URI.encode_www_form_component(passage)}&version=#{URI.encode_www_form_component(VERSIONS[version])}" end - def scrape_passage(doc, version) + def scrape_passage(doc, version, passage) container = doc.css('div.passage-text') - title = container.css("div.version-#{VERSIONS[version]}.result-text-style-normal.text-html h1 span")[0].content.strip if container.css("div.version-#{VERSIONS[version]}.result-text-style-normal.text-html h1")[0] != nil + title = doc.css(".dropdown-display-text").first.children.first.text + title = passage unless title != nil segment = doc.at('div.passage-text') segment.search('sup.crossreference').remove # remove cross reference links From b2b8bf24b5581c61ec066921bb74d4b07ba9f28b Mon Sep 17 00:00:00 2001 From: gazayas Date: Thu, 26 Nov 2020 17:46:48 +0900 Subject: [PATCH 2/2] Refactor title code --- lib/bible_gateway.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/bible_gateway.rb b/lib/bible_gateway.rb index af81d32..c8ccc32 100644 --- a/lib/bible_gateway.rb +++ b/lib/bible_gateway.rb @@ -84,7 +84,7 @@ def old_passage_url(passage) def scrape_passage(doc, version, passage) container = doc.css('div.passage-text') title = doc.css(".dropdown-display-text").first.children.first.text - title = passage unless title != nil + title ||= passage segment = doc.at('div.passage-text') segment.search('sup.crossreference').remove # remove cross reference links