Source code for web2vec.extractors.external_api.open_phish_features

import logging
from dataclasses import dataclass
from functools import cache

import requests

from web2vec.utils import fetch_file_from_url_and_read

logger = logging.getLogger(__name__)


[docs] @dataclass class OpenPhishFeatures: """Dataclass for OpenPhish features.""" is_phishing: bool
[docs] def get_open_phish_features(url: str) -> OpenPhishFeatures: """Check if the given URL is listed in the OpenPhish feed.""" openphish_url = "https://openphish.com/feed.txt" try: text = fetch_file_from_url_and_read(openphish_url) urls = text.splitlines() for p_url in urls: if url in p_url: return OpenPhishFeatures(is_phishing=True) return OpenPhishFeatures(is_phishing=False) except requests.exceptions.RequestException as e: logger.error(f"Error fetching OpenPhish feed: {e}", e) return OpenPhishFeatures(is_phishing=False)
[docs] @cache def get_open_phish_features_cached(url: str) -> OpenPhishFeatures: """Get the OpenPhish features for the given URL.""" return get_open_phish_features(url)
if __name__ == "__main__": url = "http://www.example.com" result = get_open_phish_features(url) print(f"{url} is phishing: {result.is_phishing}")