Skip to content
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 31 additions & 56 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ pub struct AppState {
pub database: Arc<Database>,
pub index: Arc<Index>
}
pub async fn start(index_location: &str, database_address: &str, port: u32) -> Result<(), errors::AppError> {
pub async fn start(index_location: &str, database_address: &str, port: u32, use_mmap: bool) -> Result<(), errors::AppError> {
let version = format!("{}/.version", index_location);

let sampledata = format!("{}/datastore/sampledata.json", index_location);
Expand All @@ -33,7 +33,8 @@ pub async fn start(index_location: &str, database_address: &str, port: u32) -> R
let taxons = format!("{}/datastore/taxons.tsv", index_location);

let sa = format!("{}/sa.bin", index_location);
let proteins = format!("{}/proteins.tsv", index_location);
let proteins = format!("{}/proteins.bin", index_location);
let mappings = format!("{}/mapping.bin", index_location);

let database = Database::try_from_url(database_address)?;

Expand All @@ -48,7 +49,7 @@ pub async fn start(index_location: &str, database_address: &str, port: u32) -> R
&taxons
)?;

let index = Index::try_from_files(&sa, &proteins)?;
let index = Index::try_from_files(&sa, &proteins, &mappings, use_mmap)?;

let app_state = AppState {
datastore: Arc::new(datastore),
Expand Down
6 changes: 4 additions & 2 deletions api/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,16 @@ pub struct Arguments {
#[arg(short, long)]
database_address: String,
#[arg(short, long)]
port: u32
port: u32,
#[arg(short, long, default_value_t = false)]
mmap: bool
}

#[tokio::main]
async fn main() {
let args = Arguments::parse();

if let Err(e) = start(&args.index_location, &args.database_address, args.port).await {
if let Err(e) = start(&args.index_location, &args.database_address, args.port, args.mmap).await {
eprintln!("{}", e);
std::process::exit(1);
}
Expand Down
6 changes: 3 additions & 3 deletions index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ version = "2.3.2"
edition = "2021"

[dependencies]
sa-compression = { git = "https://github.com/unipept/unipept-index.git" }
sa-index = { git = "https://github.com/unipept/unipept-index.git" }
sa-mappings = { git = "https://github.com/unipept/unipept-index.git" }
sa-server = { git = "https://github.com/unipept/unipept-index.git", branch = "feature/speedup-loading-index" }
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The branch of the dependency is still using the feature branch

sa-index = { git = "https://github.com/unipept/unipept-index.git", branch = "feature/speedup-loading-index" }
sa-mappings = { git = "https://github.com/unipept/unipept-index.git", branch = "feature/speedup-loading-index" }
Comment thread
tibvdm marked this conversation as resolved.
Outdated
thiserror = "1.0"
58 changes: 15 additions & 43 deletions index/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,39 +1,35 @@
use std::{
fs::File,
io::{BufReader, Read}
};

pub use errors::IndexError;
use errors::LoadIndexError;
use sa_compression::load_compressed_suffix_array;
use sa_server::{load_mapping_file, load_proteins_file, load_suffix_array_file};
pub use sa_index::peptide_search::ProteinInfo;
pub use sa_index::peptide_search::SearchResult;
use sa_index::{
binary::load_suffix_array,
peptide_search::{search_all_peptides},
sa_searcher::BitVecSearcher,
SuffixArray
};
use sa_mappings::proteins::Proteins;
use sa_index::sa_searcher::Searcher;
use sa_index::suffix_to_protein_index::SuffixToProteinIndex;
Comment thread
tibvdm marked this conversation as resolved.
Outdated

mod errors;

pub struct Index {
searcher: BitVecSearcher
searcher: Searcher
}

impl Index {
pub fn try_from_files(index_file: &str, proteins_file: &str) -> Result<Self, IndexError> {

let proteins = Proteins::try_from_database_file(proteins_file)
.map_err(|_| LoadIndexError::LoadProteinsErrors(
LoadIndexError::FileNotFound(proteins_file.to_string()).to_string(),
))?;
pub fn try_from_files(index_file: &str, proteins_file: &str, mapping_file: &str, use_mmap: bool) -> Result<Self, IndexError> {
eprintln!("Loading proteins from file: {}", proteins_file);
let proteins =
load_proteins_file(proteins_file, use_mmap).map_err(|err| LoadIndexError::LoadProteinsErrors(err.to_string()))?;

eprintln!("Loading suffix array from file: {}", index_file);
let suffix_array =
load_index_file(index_file).map_err(|err| LoadIndexError::LoadSuffixArrayError(err.to_string()))?;
load_suffix_array_file(index_file, use_mmap).map_err(|err| LoadIndexError::LoadSuffixArrayError(err.to_string()))?;

let searcher = BitVecSearcher::new(suffix_array, proteins);
eprintln!("Creating searcher");
let suffix_to_protein_mapping =
load_mapping_file(mapping_file, use_mmap).map_err(|err| LoadIndexError::LoadProteinsErrors(err.to_string()))?;
Comment thread
tibvdm marked this conversation as resolved.
Outdated
Comment thread
tibvdm marked this conversation as resolved.
Outdated

let searcher = Searcher::new(suffix_array, proteins, suffix_to_protein_mapping.0);
Comment thread
tibvdm marked this conversation as resolved.
Outdated

Ok(Self { searcher })
}
Expand All @@ -42,27 +38,3 @@ impl Index {
search_all_peptides(&self.searcher, peptides, cutoff.unwrap_or(10_000), equate_il, tryptic)
}
}

fn load_index_file(index_file: &str) -> Result<SuffixArray, LoadIndexError> {
// Open the suffix array file
let mut sa_file = File::open(index_file).map_err(
|_| LoadIndexError::FileNotFound(index_file.to_string())
)?;

// Create a buffer reader for the file
let mut reader = BufReader::new(&mut sa_file);

// Read the bits per value from the binary file (1 byte)
let mut bits_per_value_buffer = [0_u8; 1];
reader.read_exact(&mut bits_per_value_buffer).map_err(|_| {
LoadIndexError::LoadSuffixArrayError("Could not read the flags from the binary file".to_string())
})?;
let bits_per_value = bits_per_value_buffer[0];

if bits_per_value == 64 {
Ok(load_suffix_array(&mut reader).map_err(|err| LoadIndexError::LoadSuffixArrayError(err.to_string()))?)
} else {
Ok(load_compressed_suffix_array(&mut reader, bits_per_value as usize)
.map_err(|err| LoadIndexError::LoadSuffixArrayError(err.to_string()))?)
}
}
Loading