Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 3 additions & 10 deletions fixtures/fragments/file1.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,10 @@ Even with fragment checking enabled, the following links must hence succeed:
[Link to remote binary file without fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin)
[Link to remote binary file with empty fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin#)

## Local file with fragment
## With fragment

For local files URIs with fragment, the fragment checker is invoked and fails to read the content,
but the file checker emits a warning only. The following link hence must succeed as well:
Fragment checking is skipped if the Content-Type header is not "text/html", "text/markdown", or "text/plain" with ".md" URL path ending.
Hence, despite containing fragments which cannot be checked in binary files, the following links are expected to succeed with a HTTP 200 status:

[Link to local binary file with fragment](zero.bin#fragment)

## Remote URL with fragment

Right now, there is not MIME/content type based exclusion for fragment checks in the website checker.
Also, other than the file checker, the website checker throws an error if reading the response body fails.
The following link hence must fail:

[Link to remote binary file with fragment](https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin#fragment)
4 changes: 2 additions & 2 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1915,9 +1915,9 @@ mod cli {
"https://raw.githubusercontent.com/lycheeverse/lychee/master/fixtures/fragments/zero.bin#fragment",
))
.stdout(contains("42 Total"))
.stdout(contains("29 OK"))
.stdout(contains("30 OK"))
// Failures because of missing fragments or failed binary body scan
.stdout(contains("13 Errors"));
.stdout(contains("12 Errors"));
}

#[test]
Expand Down
46 changes: 33 additions & 13 deletions lychee-lib/src/checker/website.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::{
BasicAuthCredentials, ErrorKind, Status, Uri,
BasicAuthCredentials, ErrorKind, FileType, Status, Uri,
chain::{Chain, ChainResult, ClientRequestChains, Handler, RequestChain},
quirks::Quirks,
retry::RetryExt,
Expand All @@ -9,8 +9,8 @@ use crate::{
use async_trait::async_trait;
use http::{Method, StatusCode};
use octocrab::Octocrab;
use reqwest::{Request, Response};
use std::{collections::HashSet, time::Duration};
use reqwest::{Request, Response, header::CONTENT_TYPE};
use std::{collections::HashSet, path::Path, time::Duration};

#[derive(Debug, Clone)]
pub(crate) struct WebsiteChecker {
Expand Down Expand Up @@ -108,7 +108,28 @@ impl WebsiteChecker {
&& method == Method::GET
&& response.url().fragment().is_some_and(|x| !x.is_empty())
{
self.check_html_fragment(status, response).await
let Some(content_type) = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
else {
return status;
};

let file_type = match content_type {
ct if ct.starts_with("text/html") => FileType::Html,
ct if ct.starts_with("text/markdown") => FileType::Markdown,
ct if ct.starts_with("text/plain") => {
let path = Path::new(response.url().path());
match path.extension() {
Some(ext) if ext.eq_ignore_ascii_case("md") => FileType::Markdown,
_ => return status,
}
}
_ => return status,
};

self.check_html_fragment(status, response, file_type).await
} else {
status
}
Expand All @@ -117,19 +138,18 @@ impl WebsiteChecker {
}
}

async fn check_html_fragment(&self, status: Status, response: Response) -> Status {
async fn check_html_fragment(
&self,
status: Status,
response: Response,
file_type: FileType,
) -> Status {
let url = response.url().clone();
match response.text().await {
Ok(text) => {
Ok(content) => {
match self
.fragment_checker
.check(
FragmentInput {
content: text,
file_type: crate::FileType::Html,
},
&url,
)
.check(FragmentInput { content, file_type }, &url)
.await
{
Ok(true) => status,
Expand Down