30 lines
843 B
Bash
Executable File
30 lines
843 B
Bash
Executable File
#!/bin/sh
|
|
|
|
set -e
|
|
. ./.env
|
|
mkdir -p $LOCAL_DATA_DIR
|
|
mkdir -p $LOCAL_EMBEDDINGS_DIR
|
|
mkdir -p $LOCAL_MODEL_DIR
|
|
mkdir -p $LOCAL_OUTPUT_DIR
|
|
. .venv/bin/activate
|
|
set -x
|
|
|
|
DATASET_NAME="m3-docvqa"
|
|
RETRIEVAL_MODEL_TYPE="colpali"
|
|
RETRIEVAL_ADAPTER_MODEL_NAME="colpali-v1.2"
|
|
SPLIT="dev"
|
|
FAISS_INDEX_TYPE='ivfflat'
|
|
EMBEDDING_NAME=$RETRIEVAL_ADAPTER_MODEL_NAME"_"$DATASET_NAME"_"$SPLIT
|
|
INDEX_NAME=$EMBEDDING_NAME"_pageindex_"$FAISS_INDEX_TYPE # where to save resulting index
|
|
echo $EMBEDDING_NAME
|
|
echo $FAISS_INDEX_TYPE
|
|
uv run examples/run_indexing_m3docvqa.py \
|
|
--use_retrieval \
|
|
--retrieval_model_type=$RETRIEVAL_MODEL_TYPE \
|
|
--data_name=$DATASET_NAME \
|
|
--split=$SPLIT \
|
|
--loop_unique_doc_ids=False \
|
|
--embedding_name=$EMBEDDING_NAME \
|
|
--faiss_index_type=$FAISS_INDEX_TYPE \
|
|
--output_dir=$LOCAL_EMBEDDINGS_DIR/$INDEX_NAME
|