import asyncio
import re
import traceback

import crawlee.errors


def _get_only_innermost_exception(error: BaseException) -> BaseException:
    """Get innermost exception by following __cause__ and __context__ attributes of exception.

    If the innermost exception is UserHandlerTimeoutError, return whatever caused that if possible.
    """
    if type(error) is crawlee.errors.UserHandlerTimeoutError:
        if error.__cause__:
            return error.__cause__
        if error.__context__:
            return error.__context__
        return error

    if error.__cause__:
        return _get_only_innermost_exception(error.__cause__)
    if error.__context__:
        return _get_only_innermost_exception(error.__context__)
    # No __cause__ and no __context__, this is as deep as it can get.
    return error


def _get_filtered_traceback_parts_for_asyncio_timeout_error(traceback_parts: list[str]) -> list[str]:
    """Extract only the most relevant traceback parts from stack trace."""
    ignore_pattern = (
        r'([\\/]{1}asyncio[\\/]{1})|'  # internal asyncio parts
        r'(Traceback \(most recent call last\))|'  # common part of the stack trace formatting
        r'(asyncio\.exceptions\.CancelledError)'  # internal asyncio exception
    )
    return [
        _strip_pep657_highlighting(traceback_part)
        for traceback_part in traceback_parts
        if not re.findall(ignore_pattern, traceback_part)
    ]


def _strip_pep657_highlighting(traceback_part: str) -> str:
    """Remove PEP 657 highlighting from the traceback."""
    highlight_pattern = r'(\n\s*~*\^+~*\n)$'
    return re.sub(highlight_pattern, '\n', traceback_part)


def reduce_asyncio_timeout_error_to_relevant_traceback_parts(
    timeout_error: asyncio.exceptions.TimeoutError | crawlee.errors.UserHandlerTimeoutError,
) -> list[str]:
    innermost_error_traceback_parts = _get_traceback_parts_for_innermost_exception(timeout_error)
    return _get_filtered_traceback_parts_for_asyncio_timeout_error(innermost_error_traceback_parts)


def _get_traceback_parts_for_innermost_exception(error: Exception) -> list[str]:
    innermost_error = _get_only_innermost_exception(error)
    return traceback.format_exception(
        type(innermost_error), value=innermost_error, tb=innermost_error.__traceback__, chain=False
    )


def get_one_line_error_summary_if_possible(error: Exception) -> str:
    if isinstance(error, asyncio.exceptions.TimeoutError):
        relevant_part = reduce_asyncio_timeout_error_to_relevant_traceback_parts(error)
        most_relevant_part = (',' + relevant_part[-1]) if len(relevant_part) else ''
    elif isinstance(error, crawlee.errors.UserHandlerTimeoutError):
        # Error is user defined handler. First two lines should be location of the `UserHandlerTimeoutError` in crawlee
        # code and third line the topmost user error
        traceback_parts = _get_traceback_parts_for_innermost_exception(error)
        relevant_index_from_start = 3
        most_relevant_part = traceback_parts[2] if len(traceback_parts) >= relevant_index_from_start else ''
    elif 'playwright._impl._errors.Error' in str(error.__class__):
        # Playwright autogenerated errors are often very long, so we do not try to summarize them at all as they anyway
        # point to deep internals.
        return ''
    else:
        traceback_parts = _get_traceback_parts_for_innermost_exception(error)
        # Commonly last traceback part is type of the error, and the second last part is the relevant file.
        # If there are not enough traceback parts, then we are not sure how to summarize the error.
        relevant_traceback_part_index_from_end = 2
        most_relevant_part = _strip_pep657_highlighting(
            _get_traceback_parts_for_innermost_exception(error)[-relevant_traceback_part_index_from_end]
            if len(traceback_parts) >= relevant_traceback_part_index_from_end
            else ''
        )

    return most_relevant_part.strip('\n ').replace('\n', ', ')
