Source code for web2vec.extractors.external_api.open_pagerank_features
import logging
from dataclasses import dataclass
from functools import cache
from typing import Optional
import requests
from web2vec.config import config
logger = logging.getLogger(__name__)
[docs]
@dataclass
class OpenPageRankFeatures:
"""Dataclass for Open PageRank features."""
domain: str
page_rank_decimal: Optional[float]
updated_date: Optional[str]
[docs]
class OpenPageRankAPI:
def __init__(self, api_key):
self.api_key = api_key
self.base_url = "https://openpagerank.com/api/v1.0/getPageRank"
[docs]
def get_open_page_rank_features(
self, domain: str
) -> Optional[OpenPageRankFeatures]:
"""Get Open PageRank features for the given domain."""
headers = {"API-OPR": self.api_key}
params = {"domains[]": domain}
response = requests.get(
self.base_url, headers=headers, params=params, timeout=config.api_timeout
)
if response.status_code == 200:
data = response.json()
if "response" in data and len(data["response"]) > 0:
domain_data = data["response"][0]
return OpenPageRankFeatures(
domain=domain_data["domain"],
page_rank_decimal=domain_data.get("page_rank_decimal"),
updated_date=data["last_updated"],
)
else:
logger.warning("No data found for the specified domain.")
return None
else:
response.raise_for_status()
[docs]
def get_open_page_rank_features(domain: str) -> Optional[OpenPageRankFeatures]:
"""Get Open PageRank features for the given domain."""
api_key = config.open_page_rank_api_key
opr_api = OpenPageRankAPI(api_key)
return opr_api.get_open_page_rank_features(domain)
[docs]
@cache
def get_open_page_rank_features_cached(domain: str) -> Optional[OpenPageRankFeatures]:
"""Get Open PageRank features for the given domain (cached)."""
return get_open_page_rank_features(domain)
if __name__ == "__main__":
api_key = config.open_page_rank_api_key
domain = "wp.pl"
opr_api = OpenPageRankAPI(api_key)
page_rank_data = opr_api.get_open_page_rank_features(domain)
if page_rank_data:
print(f"Domain: {page_rank_data.domain}")
print(f"PageRank: {page_rank_data.page_rank_decimal}")
print(f"Updated Date: {page_rank_data.updated_date}")
else:
print("Failed to retrieve PageRank data.")