미니 프로젝트 2일차

카테고리 없음

미니 프로젝트 2일차

수혀이0812 2024. 11. 4. 00:58

728x90

# 딥러닝 환경 구축 : CUDA, CuDNN

https://developer.nvidia.com/cuda-11.3.0-download-archive?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exe_local

https://developer.nvidia.com/cudnn-downloads?target_os=Windows&target_arch=x86_64&target_version=10&target_type=exe_local

cuDNN 9.5.1 Downloads

developer.nvidia.com

https://velog.io/@euisuk-chung/%ED%99%98%EA%B2%BD%EA%B5%AC%EC%B6%95-CUDA-CuDNN%EC%95%84-%EB%82%98-%EC%A2%80-%EA%B7%B8%EB%A7%8C-%EA%B4%B4%EB%A1%AD%ED%98%80%EB%9D%BC-%E3%85%A0

[Linux] 딥러닝 환경 구축 : CUDA, CuDNN

딥러닝 환경 구축, CUDA/CUDNN 설치 더 이상 고민하지 말자!

velog.io

# 텍스트 -> 사진 (stabilityai/stable-diffusion-3.5-large)

가이드..

https://stabilityai.notion.site/Stable-Diffusion-3-5-Large-Fine-tuning-Tutorial-11a61cdcd1968027a15bdbd7c40be8c6

실패 코드...

# '''
# 5. 음악, 명언, 그림 추천/생성: (모델 미정) :
#      (후보 1) facebook/musicgen-small 
#      프롬프터를 작성해주면 음악을 만들어주는 모델입니다. 
#      대신 이건 저희가 백엔드 쪽에서 프롬프터를 몇가지 만들어서 넣어줘야 할 것 같아요.
# '''


from diffusers import StableDiffusionPipeline
from fastapi import FastAPI, Form
from PIL import Image
import torch
import json
import requests

# Hugging Face API 토큰 설정
API_TOKEN = "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"
headers = {"Authorization": f"Bearer {API_TOKEN}"}

# Stable Diffusion 모델 로드
pipe = StableDiffusionPipeline.from_pretrained(
    "stabilityai/stable-diffusion-3.5-large",
    torch_dtype=torch.float16,
    use_auth_token=API_TOKEN  # API 토큰을 사용해 인증
)
pipe = pipe.to("cuda")

# FastAPI 인스턴스 생성
app = FastAPI()

# 이미지 생성 엔드포인트
@app.post("/createimage/")
async def create_image(text: str = Form(...)):
    """
    주어진 텍스트 프롬프트를 기반으로 이미지를 생성하여 저장합니다.
    """
    # 텍스트 기반으로 이미지 생성
    image = pipe(
        prompt=text,
        num_inference_steps=28,
        guidance_scale=3.5
    ).images[0]

    # 이미지 저장
    image_path = "generated_image.png"
    image.save(image_path)

    return {"image_path": image_path}



# from diffusers import BitsAndBytesConfig, StableDiffusionPipeline
# from fastapi import FastAPI, Form
# from PIL import Image
# import torch
# # from huggingface_hub import login

# # 엑세스 토큰
# # Hugging Face 로그인 (토큰 필요)
# # login("your_hugging_face_access_token")

# # 모델 설정
# model_id = "stabilityai/stable-diffusion-3.5-large, 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=access_token"
# nf4_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.bfloat16
# )

# # 모델 로드
# pipe = StableDiffusionPipeline.from_pretrained(
#     model_id,
#     torch_dtype=torch.bfloat16,
#     quantization_config=nf4_config
# )
# pipe = pipe.to("cuda")
# pipe.enable_model_cpu_offload()

# # FastAPI 인스턴스 생성
# app = FastAPI()


# # 이미지 생성 엔드포인트
# @app.post("/createimage/")
# async def create_image(text: str = Form(...)):
#     """
#     주어진 텍스트 프롬프트를 기반으로 이미지를 생성하여 저장합니다.
#     """
#     # 텍스트 기반으로 이미지 생성
#     image = pipe(
#         prompt=text,
#         num_inference_steps=28,
#         guidance_scale=3.5
#     ).images[0]

#     # 이미지 저장 경로
#     image_path = "generated_image.png"
#     image.save(image_path)

#     return {"image_path": image_path}

# 텍스트 -> 오디오 (facebook/musicgen-small)

from fastapi import FastAPI, Form
from transformers import pipeline, AutoProcessor, MusicgenForConditionalGeneration
import scipy.io.wavfile
import torch
import tempfile
import os
import torch
# print(torch.cuda.is_available())  # True가 출력되면 GPU를 사용할 수 있는 상태입니다.
# 모델이 GPU로 이동되었는지 확인

app = FastAPI()

# # MusicGen 모델 설정
processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small", torch_dtype=torch.float32).to("cuda")
print(next(model.parameters()).device)  # "cuda:0"이 출력되면 GPU에 로드된 것

# 음악 생성 파이프라인 함수
def generate_music(text: str, length: int = 512):
    # 텍스트를 모델에 입력할 수 있는 형태로 변환
    inputs = processor(text=[text], padding=True, return_tensors="pt").to("cuda")
    
    # 모델을 통해 오디오 값 생성
    audio_values = model.generate(**inputs, max_new_tokens=length)
    
    # 생성된 오디오를 WAV 파일로 임시 저장
    sampling_rate = model.config.audio_encoder.sampling_rate
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
        scipy.io.wavfile.write(tmp_file.name, rate=sampling_rate, data=audio_values[0, 0].cpu().numpy())
        temp_path = tmp_file.name
    
    return temp_path

# FastAPI 엔드포인트: 요약된 텍스트 기반으로 음악 생성
@app.post("/recommend_music/")
async def recommend_music(summary_text: str = Form(...), length: int = Form(512)):
    # 음악 생성
    music_path = generate_music(summary_text)
    
    # 생성된 음악 파일 경로 반환
    return {"music_path": music_path}

# FastAPI에서 정적 파일로 WAV 파일을 제공할 수 있도록 설정 (간단한 테스트용)
@app.get("/download_music/")
async def download_music(music_path: str):
    if os.path.exists(music_path):
        with open(music_path, "rb") as f:
            data = f.read()
        return {"audio_data": data}
    else:
        return {"error": "File not found"}

728x90

저작자표시