프로젝트
too many request
content0474
2025. 1. 14. 20:38
발생한 문제
api로 데이터를 가져올 때 일부 카테고리는 too many reqeust가 뜨면서 데이터가 가져와지지 않음
기존코드
@shared_task
def fetch_and_store_news(news_source="NYTimes"):
API_KEY = config("NYT_API_KEY")
categories = Category.objects.all()
if not categories.exists():
print("No categories found in the database.")
return
for category in categories:
# 언론사별 카테고리 이름 가져오기
source_category = category.get_source_category(category.name, news_source)
if not source_category:
print(f"No mapping found for category '{category.name}' in source '{news_source}'.")
continue
url = f"https://api.nytimes.com/svc/topstories/v2/{source_category}.json"
params = {'api-key': API_KEY}
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.json()
articles = data.get('results', [])
print(f"Fetching articles for category: {category.name} ({source_category})")
for article in articles[:5]:
# Redis에 저장
news_url = article.get('url', 'No URL')
redis_key = f"news:{category.name.lower()}:{news_url}"
redis_value = json.dumps({
'title': article.get('title', 'No Title'),
'abstract': article.get('abstract', 'No Abstract'),
'url': news_url,
'published_date': article.get('published_date', 'No Date'),
'category': category.name
})
redis_client.set(redis_key, redis_value, ex=86400)
# PostgreSQL에 저장
News.objects.update_or_create(
url=news_url,
defaults={
'title': article.get('title', 'No Title'),
'abstract': article.get('abstract', 'No Abstract'),
'published_date': article.get('published_date', 'No Date'),
'category': category
}
)
elif response.status_code == 429:
print(f"Too many requests for category: {category.name}. Please wait.")
time.sleep(10)
else:
print(f"Failed to fetch articles for category: {category.name}. Status Code: {response.status_code}")
too many request 관련 처리로직이 들어가 있기는 하지만, too manry reqeust로 인해 실패한 작업을 다시 반복하거나 시도횟수를 설정해두지 않았다.
수정코드
@shared_task
def fetch_and_store_news(news_source="NYTimes"):
API_KEY = config("NYT_API_KEY")
MAX_RETRIES = 3 # 최대 재시도 횟수
REQUEST_DELAY = 5 # 기본 요청 간 대기 시간 (초)
TOO_MANY_REQUEST_DELAY = 60 # Too Many Requests 시 대기 시간 (초)
categories = Category.objects.all()
if not categories.exists():
print("No categories found in the database.")
return
for category in categories:
source_category = category.get_source_category(category.name, news_source)
if not source_category:
print(f"No mapping found for category '{category.name}' in source '{news_source}'.")
continue
url = f"https://api.nytimes.com/svc/topstories/v2/{source_category}.json"
params = {'api-key': API_KEY}
retries = 0
while retries < MAX_RETRIES:
response = requests.get(url, params=params)
if response.status_code == 200:
data = response.json()
articles = data.get('results', [])
print(f"Fetching articles for category: {category.name} ({source_category})")
for article in articles[:5]:
news_url = article.get('url', 'No URL')
redis_key = f"news:{category.name.lower()}:{news_url}"
redis_value = json.dumps({
'title': article.get('title', 'No Title'),
'abstract': article.get('abstract', 'No Abstract'),
'url': news_url,
'published_date': article.get('published_date', 'No Date'),
'category': category.name
})
redis_client.set(redis_key, redis_value, ex=86400)
News.objects.update_or_create(
url=news_url,
defaults={
'title': article.get('title', 'No Title'),
'abstract': article.get('abstract', 'No Abstract'),
'published_date': article.get('published_date', 'No Date'),
'category': category
}
)
break
elif response.status_code == 429:
print(f"Too many requests for category: {category.name}. Retrying in {TOO_MANY_REQUEST_DELAY} seconds...")
time.sleep(TOO_MANY_REQUEST_DELAY)
else:
print(f"Failed to fetch articles for category: {category.name}. Status Code: {response.status_code}")
retries += 1
time.sleep(REQUEST_DELAY)
if retries == MAX_RETRIES:
print(f"Max retries reached for category: {category.name}. Skipping...")
time.sleep(REQUEST_DELAY)
실패 시 기다렸다가 재시도하면서 retries를 증가시키고, 최대 3회 시도하도록 했다.
재시도횟수와 대시시간은 변수를 사용해서 쉽게 조정할 수 있게 했다.