Bioinformatics

거대 언어 모델 기반 scBERT 분석 도구 설치 및 실행

kogun82 2024. 7. 19. 23:47

## 분석 환경

1. Ubuntu 23.04

2. NVIDA GeForce RTX 3090

3. CUDA Driver Version 12.2

3. Anaconda

 

## conda env install

conda search "^python$"
conda clean -i
conda clean --all
conda remove --name scbert --all
conda create -n scbert python==3.7.12 pip -c conda-forge
conda activate scbert

 

## package install

pip install pandas
pip install scipy
pip install scikit-learn
pip install einops
pip install local_attention
pip install scanpy
pip install requests
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117


## GPU 테스트 코드

import torch
torch.cuda.is_available()


# step-1 (Fine-tune using pre-trained models)

python -v -m torch.distributed.launch --nproc_per_node 4 pretrain.py --data_path zheng68K_train.h5ad --model_name zheng68K_train_pretrain


# step-2 (Predict using fine-tuned models)

# select is best model for accuracy

python -m torch.distributed.launch --nproc_per_node 4 finetune.py --data_path ./zheng68K_train.h5ad --model_path ./ckpts/zheng68K_train_pretrain_70.pth


# step-3 (Detection of novel cell type)

python -m torch.distributed.launch --nproc_per_node 4 predict.py --data_path ./zheng68K_test.h5ad --model_path ./ckpts/finetune_best.pth --novel_type True --unassign_thres 0.5 > out


## 케이스 별 Error 발생 처리 방법


!! ModuleNotFoundError: No module named 'local_attention'

pip install local-attention


!! If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation. 
!! See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF..

Windows: set 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512'
Linux: export 'PYTORCH_CUDA_ALLOC_CONF=max_split_size_mb:512'