Bioinformatics
거대 언어 모델 기반 scBERT 분석 도구 설치 및 실행
kogun82
2024. 7. 19. 23:47
## 분석 환경
1. Ubuntu 23.04
2. NVIDA GeForce RTX 3090
3. CUDA Driver Version 12.2
3. Anaconda
## conda env install
conda search "^python$"
conda clean -i
conda clean --all
conda remove --name scbert --all
conda create -n scbert python==3.7.12 pip -c conda-forge
conda activate scbert
## package install
pip install pandas
pip install scipy
pip install scikit-learn
pip install einops
pip install local_attention
pip install scanpy
pip install requests
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117
## GPU 테스트 코드
import torch
torch.cuda.is_available()
# step-1 (Fine-tune using pre-trained models)
python -v -m torch.distributed.launch --nproc_per_node 4 pretrain.py --data_path zheng68K_train.h5ad --model_name zheng68K_train_pretrain
# step-2 (Predict using fine-tuned models)
# select is best model for accuracy
python -m torch.distributed.launch --nproc_per_node 4 finetune.py --data_path ./zheng68K_train.h5ad --model_path ./ckpts/zheng68K_train_pretrain_70.pth
# step-3 (Detection of novel cell type)
python -m torch.distributed.launch --nproc_per_node 4 predict.py --data_path ./zheng68K_test.h5ad --model_path ./ckpts/finetune_best.pth --novel_type True --unassign_thres 0.5 > out
## 케이스 별 Error 발생 처리 방법
!! ModuleNotFoundError: No module named 'local_attention'
pip install local-attention
!! If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.
!! See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF..
Windows: set 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512'
Linux: export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512'
반응형