#!/bin/sh set -e . ./.env mkdir -p $LOCAL_DATA_DIR mkdir -p $LOCAL_EMBEDDINGS_DIR mkdir -p $LOCAL_MODEL_DIR mkdir -p $LOCAL_OUTPUT_DIR . .venv/bin/activate set -x DATASET_NAME="m3-docvqa" RETRIEVAL_MODEL_TYPE="colpali" RETRIEVAL_MODEL_NAME="colpaligemma-3b-pt-448-base" RETRIEVAL_ADAPTER_MODEL_NAME="colpali-v1.2" SPLIT="dev" EMBEDDING_NAME=$RETRIEVAL_ADAPTER_MODEL_NAME"_"$DATASET_NAME"_"$SPLIT # where to save embeddings # uv tool run accelerate launch --num_processes=1 --mixed_precision=bf16 examples/run_page_embedding.py \ accelerate launch --num_processes=1 --mixed_precision=bf16 examples/run_page_embedding.py \ --use_retrieval \ --retrieval_model_type=$RETRIEVAL_MODEL_TYPE \ --data_name=$DATASET_NAME \ --split=$SPLIT \ --loop_unique_doc_ids=True \ --output_dir=$LOCAL_EMBEDDINGS_DIR/$EMBEDDING_NAME \ --retrieval_model_name_or_path=$RETRIEVAL_MODEL_NAME \ --retrieval_adapter_model_name_or_path=$RETRIEVAL_ADAPTER_MODEL_NAME