프로젝트

too many request

content0474 2025. 1. 14. 20:38

발생한 문제

api로 데이터를 가져올 때 일부 카테고리는 too many reqeust가 뜨면서 데이터가 가져와지지 않음

 

기존코드

@shared_task
def fetch_and_store_news(news_source="NYTimes"):
    API_KEY = config("NYT_API_KEY")

    categories = Category.objects.all()
    if not categories.exists():
        print("No categories found in the database.")
        return

    for category in categories:
        # 언론사별 카테고리 이름 가져오기
        source_category = category.get_source_category(category.name, news_source)
        if not source_category:
            print(f"No mapping found for category '{category.name}' in source '{news_source}'.")
            continue

        url = f"https://api.nytimes.com/svc/topstories/v2/{source_category}.json"
        params = {'api-key': API_KEY}

        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            articles = data.get('results', [])
            print(f"Fetching articles for category: {category.name} ({source_category})")

            for article in articles[:5]:
                # Redis에 저장
                news_url = article.get('url', 'No URL')
                redis_key = f"news:{category.name.lower()}:{news_url}"
                redis_value = json.dumps({
                    'title': article.get('title', 'No Title'),
                    'abstract': article.get('abstract', 'No Abstract'),
                    'url': news_url,
                    'published_date': article.get('published_date', 'No Date'),
                    'category': category.name
                })
                redis_client.set(redis_key, redis_value, ex=86400)

                # PostgreSQL에 저장
                News.objects.update_or_create(
                    url=news_url,
                    defaults={
                        'title': article.get('title', 'No Title'),
                        'abstract': article.get('abstract', 'No Abstract'),
                        'published_date': article.get('published_date', 'No Date'),
                        'category': category
                    }
                )
        elif response.status_code == 429:
            print(f"Too many requests for category: {category.name}. Please wait.")
            time.sleep(10)
        else:
            print(f"Failed to fetch articles for category: {category.name}. Status Code: {response.status_code}")

 

too many request 관련 처리로직이 들어가 있기는 하지만, too manry reqeust로 인해 실패한 작업을 다시 반복하거나 시도횟수를 설정해두지 않았다.

 

too many requests for category:___ 이 카테고리들은 기사가 받아지지 않았다.

 

수정코드

@shared_task
def fetch_and_store_news(news_source="NYTimes"):
    API_KEY = config("NYT_API_KEY")
    MAX_RETRIES = 3  # 최대 재시도 횟수
    REQUEST_DELAY = 5  # 기본 요청 간 대기 시간 (초)
    TOO_MANY_REQUEST_DELAY = 60  # Too Many Requests 시 대기 시간 (초)

    categories = Category.objects.all()
    if not categories.exists():
        print("No categories found in the database.")
        return

    for category in categories:
        source_category = category.get_source_category(category.name, news_source)
        if not source_category:
            print(f"No mapping found for category '{category.name}' in source '{news_source}'.")
            continue

        url = f"https://api.nytimes.com/svc/topstories/v2/{source_category}.json"
        params = {'api-key': API_KEY}
        
        retries = 0
        while retries < MAX_RETRIES:
            response = requests.get(url, params=params)
            if response.status_code == 200:
                data = response.json()
                articles = data.get('results', [])
                print(f"Fetching articles for category: {category.name} ({source_category})")

                for article in articles[:5]:
                    news_url = article.get('url', 'No URL')
                    redis_key = f"news:{category.name.lower()}:{news_url}"
                    redis_value = json.dumps({
                        'title': article.get('title', 'No Title'),
                        'abstract': article.get('abstract', 'No Abstract'),
                        'url': news_url,
                        'published_date': article.get('published_date', 'No Date'),
                        'category': category.name
                    })
                    redis_client.set(redis_key, redis_value, ex=86400)

                    News.objects.update_or_create(
                        url=news_url,
                        defaults={
                            'title': article.get('title', 'No Title'),
                            'abstract': article.get('abstract', 'No Abstract'),
                            'published_date': article.get('published_date', 'No Date'),
                            'category': category
                        }
                    )
                break  
            elif response.status_code == 429:
                print(f"Too many requests for category: {category.name}. Retrying in {TOO_MANY_REQUEST_DELAY} seconds...")
                time.sleep(TOO_MANY_REQUEST_DELAY)  
            else:
                print(f"Failed to fetch articles for category: {category.name}. Status Code: {response.status_code}")
                retries += 1
                time.sleep(REQUEST_DELAY)  

        if retries == MAX_RETRIES:
            print(f"Max retries reached for category: {category.name}. Skipping...")
        time.sleep(REQUEST_DELAY)

 

 

실패 시 기다렸다가 재시도하면서 retries를 증가시키고, 최대 3회 시도하도록 했다.

재시도횟수와 대시시간은 변수를 사용해서 쉽게 조정할 수 있게 했다.

 

too many reqeusts로 실패했던 politics도 60초 후 재실행하여 성공