import os import re # 设置CUDA_VISIBLE_DEVICES环境变量 os.environ['CUDA_VISIBLE_DEVICES'] = '2' from functools import partial from transformers import AutoTokenizer, AutoModel import pandas as pd import torch from torch.multiprocessing import Pool from tqdm import tqdm import summarize
if __name__ == '__main__': torch.multiprocessing.set_start_method('spawn') df = pd.read_excel('/data/database/pubmed/macrophage_disease.xlsx') print(df.shape) tokenizer = AutoTokenizer.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True) model = AutoModel.from_pretrained("THUDM/chatglm-6b", trust_remote_code=True).half().cuda() model = model.eval()
summary_ls = [] with Pool(processes=4) as pool: for r in tqdm(pool.imap(partial_process_data, df['abstract']), total=len(df['abstract'])): summary_ls.append(r)