web2vec
Contents:
Introduction
Quick start
Contributing
Code of conduct
Reference
web2vec
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
|
Y
|
Z
A
address (web2vec.extractors.whois_features.WhoisFeatures attribute)
api_timeout (web2vec.config.Config attribute)
ASN (web2vec.extractors.url_geo_features.GeoLiteDbType attribute)
asn (web2vec.extractors.url_geo_features.URLGeoFeatures attribute)
average_subdomain_length (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
B
body_length (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
body_length() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
body_to_special_char_ratio (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
body_to_special_char_ratio() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
BounceRate (web2vec.extractors.external_api.similar_web_features.Engagements attribute)
brave_search_api_key (web2vec.config.Config attribute)
build_graph() (in module web2vec.extractors.network_features)
C
Category (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
CategoryRank (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
CertificateExtractor (class in web2vec.crawlers.extractors)
CertificateFeatures (class in web2vec.extractors.ssl_certification_features)
check_forms() (in module web2vec.extractors.http_response_features)
check_header_content_security_policy() (in module web2vec.extractors.http_response_features)
check_header_strict_transport_security() (in module web2vec.extractors.http_response_features)
check_header_x_content_type_options() (in module web2vec.extractors.http_response_features)
check_header_x_frame_options() (in module web2vec.extractors.http_response_features)
check_header_x_xss_protection() (in module web2vec.extractors.http_response_features)
check_https() (in module web2vec.extractors.http_response_features)
check_obfuscated_scripts() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
check_phish_phishtank() (in module web2vec.extractors.external_api.phish_tank_features)
check_redirects() (in module web2vec.extractors.http_response_features)
check_server_version() (in module web2vec.extractors.http_response_features)
check_ssl() (in module web2vec.extractors.ssl_certification_features)
check_suspicious_keywords() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
city (web2vec.extractors.whois_features.WhoisFeatures attribute)
compute_derived_features() (web2vec.extractors.dns_features.DNSFeatures method)
Config (class in web2vec.config)
contains_forms (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
contains_keywords() (in module web2vec.extractors.url_lexical_features)
contains_obfuscated_scripts (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
contains_suspicious_keywords (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
copyright (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
count_ampersand_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_ampersand_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_ampersand_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_ampersand_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_asterisk_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_asterisk_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_asterisk_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_asterisk_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_at_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_at_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_at_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_at_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_char() (in module web2vec.extractors.url_lexical_features)
count_comma_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_comma_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_comma_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_comma_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dash_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dash_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dash_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dash_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dollar_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dollar_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dollar_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dollar_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dot_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dot_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dot_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_dot_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_equals_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_equals_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_equals_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_equals_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_exclamation_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_exclamation_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_exclamation_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_exclamation_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_hash_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_hash_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_hash_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_hash_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_ips (web2vec.extractors.dns_features.DNSFeatures property)
count_mx_servers (web2vec.extractors.dns_features.DNSFeatures property)
count_name_servers (web2vec.extractors.dns_features.DNSFeatures property)
count_percent_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_percent_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_percent_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_percent_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_plus_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_plus_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_plus_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_plus_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_question_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_question_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_question_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_question_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_redirects() (in module web2vec.extractors.http_response_features)
count_slash_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_slash_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_slash_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_slash_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_space_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_space_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_space_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_space_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_tilde_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_tilde_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_tilde_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_tilde_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_underscore_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_underscore_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_underscore_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_underscore_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
count_vowels() (in module web2vec.extractors.url_lexical_features)
Country (web2vec.extractors.external_api.similar_web_features.TopCountryShare attribute)
COUNTRY (web2vec.extractors.url_geo_features.GeoLiteDbType attribute)
country (web2vec.extractors.whois_features.WhoisFeatures attribute)
country_code (web2vec.extractors.url_geo_features.URLGeoFeatures attribute)
CountryCode (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
(web2vec.extractors.external_api.similar_web_features.TopCountryShare attribute)
CountryRank (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
Cpc (web2vec.extractors.external_api.similar_web_features.TopKeyword attribute)
crawler_output_path (web2vec.config.Config attribute)
crawler_spider_depth_limit (web2vec.config.Config attribute)
create_directories() (in module web2vec.utils)
created_within_30_days (web2vec.extractors.whois_features.WhoisFeatures attribute)
created_within_365_days (web2vec.extractors.whois_features.WhoisFeatures attribute)
creation_date (web2vec.extractors.whois_features.WhoisFeatures attribute)
creation_datetime (web2vec.extractors.whois_features.WhoisFeatures attribute)
D
date (web2vec.extractors.external_api.similar_web_features.EstimatedMonthlyVisit attribute)
date_added (web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
days_until_expiration (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
(web2vec.extractors.whois_features.WhoisFeatures attribute)
default_output_path (web2vec.config.Config attribute)
Description (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
detect_api_endpoints() (in module web2vec.extractors.html_body_features)
detect_likely_js_spa() (in module web2vec.extractors.html_body_features)
Direct (web2vec.extractors.external_api.similar_web_features.TrafficSource attribute)
directory_length (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
DNSExtractor (class in web2vec.crawlers.extractors)
DNSFeatures (class in web2vec.extractors.dns_features)
DNSRecordFeatures (class in web2vec.extractors.dns_features)
dnssec (web2vec.extractors.whois_features.WhoisFeatures attribute)
domain (web2vec.extractors.dns_features.DNSFeatures attribute)
(web2vec.extractors.external_api.open_pagerank_features.OpenPageRankFeatures attribute)
(web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures property)
(web2vec.extractors.external_api.url_haus_features.URLHausFeatures property)
domain_age (web2vec.extractors.whois_features.WhoisFeatures property)
domain_age_days (web2vec.extractors.whois_features.WhoisFeatures attribute)
domain_contains_keywords (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
domain_entropy (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
domain_google_index (web2vec.extractors.external_api.google_index_features.GoogleIndexFeatures attribute)
domain_in_ip_format (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
domain_length (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
domain_name (web2vec.extractors.whois_features.WhoisFeatures attribute)
domain_spf (web2vec.extractors.dns_features.DNSFeatures attribute)
E
email_present_in_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
emails (web2vec.extractors.whois_features.WhoisFeatures attribute)
Engagements (class in web2vec.extractors.external_api.similar_web_features)
(web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
entropy() (in module web2vec.utils)
entropy_of_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
EstimatedMonthlyVisit (class in web2vec.extractors.external_api.similar_web_features)
EstimatedMonthlyVisits (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
EstimatedValue (web2vec.extractors.external_api.similar_web_features.TopKeyword attribute)
expiration_date (web2vec.extractors.whois_features.WhoisFeatures attribute)
expiration_datetime (web2vec.extractors.whois_features.WhoisFeatures attribute)
expires_within_30_days (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
(web2vec.extractors.whois_features.WhoisFeatures attribute)
expires_within_7_days (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
(web2vec.extractors.whois_features.WhoisFeatures attribute)
extract_features() (web2vec.crawlers.extractors.CertificateExtractor method)
(web2vec.crawlers.extractors.DNSExtractor method)
(web2vec.crawlers.extractors.Extractor method)
(web2vec.crawlers.extractors.GoogleIndexExtractor method)
(web2vec.crawlers.extractors.HtmlBodyExtractor method)
(web2vec.crawlers.extractors.HttpResponseExtractor method)
(web2vec.crawlers.extractors.OpenPageRankExtractor method)
(web2vec.crawlers.extractors.OpenPhishExtractor method)
(web2vec.crawlers.extractors.PhishTankExtractor method)
(web2vec.crawlers.extractors.SimilarWebExtractor method)
(web2vec.crawlers.extractors.UrlGeoExtractor method)
(web2vec.crawlers.extractors.UrlHausExtractor method)
(web2vec.crawlers.extractors.UrlLexicalExtractor method)
(web2vec.crawlers.extractors.WhoisExtractor method)
extract_ttl (web2vec.extractors.dns_features.DNSFeatures property)
Extractor (class in web2vec.crawlers.extractors)
F
favicon_url (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
FEATURE_CLASS (web2vec.crawlers.extractors.CertificateExtractor attribute)
(web2vec.crawlers.extractors.DNSExtractor attribute)
(web2vec.crawlers.extractors.Extractor attribute)
(web2vec.crawlers.extractors.GoogleIndexExtractor attribute)
(web2vec.crawlers.extractors.HtmlBodyExtractor attribute)
(web2vec.crawlers.extractors.HttpResponseExtractor attribute)
(web2vec.crawlers.extractors.OpenPageRankExtractor attribute)
(web2vec.crawlers.extractors.OpenPhishExtractor attribute)
(web2vec.crawlers.extractors.PhishTankExtractor attribute)
(web2vec.crawlers.extractors.SimilarWebExtractor attribute)
(web2vec.crawlers.extractors.UrlGeoExtractor attribute)
(web2vec.crawlers.extractors.UrlHausExtractor attribute)
(web2vec.crawlers.extractors.UrlLexicalExtractor attribute)
(web2vec.crawlers.extractors.WhoisExtractor attribute)
FEATURE_TYPE (web2vec.crawlers.extractors.CertificateExtractor attribute)
(web2vec.crawlers.extractors.DNSExtractor attribute)
(web2vec.crawlers.extractors.Extractor attribute)
(web2vec.crawlers.extractors.GoogleIndexExtractor attribute)
(web2vec.crawlers.extractors.HtmlBodyExtractor attribute)
(web2vec.crawlers.extractors.HttpResponseExtractor attribute)
(web2vec.crawlers.extractors.OpenPageRankExtractor attribute)
(web2vec.crawlers.extractors.OpenPhishExtractor attribute)
(web2vec.crawlers.extractors.PhishTankExtractor attribute)
(web2vec.crawlers.extractors.SimilarWebExtractor attribute)
(web2vec.crawlers.extractors.UrlGeoExtractor attribute)
(web2vec.crawlers.extractors.UrlHausExtractor attribute)
(web2vec.crawlers.extractors.UrlLexicalExtractor attribute)
(web2vec.crawlers.extractors.WhoisExtractor attribute)
features_name() (web2vec.crawlers.extractors.Extractor method)
fetch_file_from_url() (in module web2vec.utils)
fetch_file_from_url_and_read() (in module web2vec.utils)
fetch_url() (in module web2vec.utils)
find_copyright() (in module web2vec.extractors.html_body_features)
find_favicon() (in module web2vec.extractors.html_body_features)
find_logo() (in module web2vec.extractors.html_body_features)
found_anchors (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
found_api_endpoints (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
found_forms (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
found_images (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
found_media (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
found_network_requests (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
G
GeoLiteDbType (class in web2vec.extractors.url_geo_features)
get_asn() (in module web2vec.extractors.url_geo_features)
get_certificate_features() (in module web2vec.extractors.ssl_certification_features)
get_certificate_features_cached() (in module web2vec.extractors.ssl_certification_features)
get_country() (in module web2vec.extractors.url_geo_features)
get_dns_features() (in module web2vec.extractors.dns_features)
get_dns_features_cached() (in module web2vec.extractors.dns_features)
get_domain_from_url() (in module web2vec.utils)
get_file_path_for_url() (in module web2vec.utils)
get_geolite_db_files() (in module web2vec.extractors.url_geo_features)
get_github_repo_release_info() (in module web2vec.utils)
get_google_index_features() (in module web2vec.extractors.external_api.google_index_features)
get_google_index_features_cached() (in module web2vec.extractors.external_api.google_index_features)
get_html_body_features() (in module web2vec.extractors.html_body_features)
get_http_response_features() (in module web2vec.extractors.http_response_features)
get_ip_from_domain() (in module web2vec.utils)
get_ip_from_url() (in module web2vec.utils)
get_open_page_rank_features() (in module web2vec.extractors.external_api.open_pagerank_features)
(web2vec.extractors.external_api.open_pagerank_features.OpenPageRankAPI method)
get_open_page_rank_features_cached() (in module web2vec.extractors.external_api.open_pagerank_features)
get_open_phish_features() (in module web2vec.extractors.external_api.open_phish_features)
get_open_phish_features_cached() (in module web2vec.extractors.external_api.open_phish_features)
get_phishtank_features() (in module web2vec.extractors.external_api.phish_tank_features)
get_phishtank_features_cached() (in module web2vec.extractors.external_api.phish_tank_features)
get_phishtank_feed() (in module web2vec.extractors.external_api.phish_tank_features)
get_similar_web_features() (in module web2vec.extractors.external_api.similar_web_features)
get_similar_web_features_cached() (in module web2vec.extractors.external_api.similar_web_features)
get_title() (web2vec.crawlers.models.WebPage method)
get_tls_certificate() (in module web2vec.extractors.ssl_certification_features)
get_url_geo_features() (in module web2vec.extractors.url_geo_features)
get_url_geo_features_cached() (in module web2vec.extractors.url_geo_features)
get_url_haus_features() (in module web2vec.extractors.external_api.url_haus_features)
get_url_haus_features_cached() (in module web2vec.extractors.external_api.url_haus_features)
get_url_lexical_features() (in module web2vec.extractors.url_lexical_features)
get_url_lexical_features_cached() (in module web2vec.extractors.url_lexical_features)
get_whois_features() (in module web2vec.extractors.whois_features)
get_whois_features_cached() (in module web2vec.extractors.whois_features)
GlobalRank (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
GoogleIndexExtractor (class in web2vec.crawlers.extractors)
GoogleIndexFeatures (class in web2vec.extractors.external_api.google_index_features)
H
has_repeated_digits() (in module web2vec.extractors.url_lexical_features)
having_anchor (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
having_digit_in_subdomain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
having_fragment (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
having_hyphen_in_subdomain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
having_special_char_in_subdomain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
having_underscore_in_subdomain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
hidden_elements() (in module web2vec.extractors.html_body_features)
html_snapshot_path (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
HtmlBodyExtractor (class in web2vec.crawlers.extractors)
HtmlBodyFeatures (class in web2vec.extractors.html_body_features)
HttpResponseExtractor (class in web2vec.crawlers.extractors)
HttpResponseFeatures (class in web2vec.extractors.http_response_features)
I
id (web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
iframe_redirection (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
iframe_redirection() (in module web2vec.extractors.html_body_features)
is_certificate_trusted() (in module web2vec.extractors.ssl_certification_features)
is_certificate_valid() (in module web2vec.extractors.ssl_certification_features)
is_expired (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
(web2vec.extractors.whois_features.WhoisFeatures attribute)
is_external_url() (in module web2vec.extractors.html_body_features)
is_indexed (web2vec.extractors.external_api.google_index_features.GoogleIndexFeatures attribute)
is_ip (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
is_live (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
is_live() (in module web2vec.extractors.http_response_features)
is_numerical_type() (in module web2vec.utils)
is_phishing (web2vec.extractors.external_api.open_phish_features.OpenPhishFeatures attribute)
is_trusted (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
is_valid (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
issuer (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
issuer_common_name (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
issuer_is_free_ca (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
issuer_is_lets_encrypt (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
issuer_organization_name (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
L
LargeScreenshot (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
last_online (web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
likely_js_spa (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
logo_url (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
M
Mail (web2vec.extractors.external_api.similar_web_features.TrafficSource attribute)
min_ttl (web2vec.extractors.dns_features.DNSFeatures attribute)
missing_content_security_policy (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
missing_strict_transport_security (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
missing_x_content_type_options (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
missing_x_frame_options (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
missing_x_xss_protection (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
model_config (web2vec.config.Config attribute)
module
web2vec
web2vec.config
web2vec.crawlers
web2vec.crawlers.extractors
web2vec.crawlers.models
web2vec.crawlers.spiders
web2vec.extractors
web2vec.extractors.dns_features
web2vec.extractors.external_api
web2vec.extractors.external_api.google_index_features
web2vec.extractors.external_api.open_pagerank_features
web2vec.extractors.external_api.open_phish_features
web2vec.extractors.external_api.phish_tank_features
web2vec.extractors.external_api.similar_web_features
web2vec.extractors.external_api.url_haus_features
web2vec.extractors.html_body_features
web2vec.extractors.http_response_features
web2vec.extractors.network_features
web2vec.extractors.ssl_certification_features
web2vec.extractors.url_geo_features
web2vec.extractors.url_lexical_features
web2vec.extractors.whois_features
web2vec.utils
web2vec.version
Month (web2vec.extractors.external_api.similar_web_features.Engagements attribute)
mouse_over_effect (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
mouse_over_effect() (in module web2vec.extractors.html_body_features)
N
name (web2vec.crawlers.spiders.Web2VecSpider attribute)
Name (web2vec.extractors.external_api.similar_web_features.TopKeyword attribute)
name (web2vec.extractors.whois_features.WhoisFeatures attribute)
name_servers (web2vec.extractors.whois_features.WhoisFeatures attribute)
not_after (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
not_before (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
num_api_endpoints (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_email_forms (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_email_forms() (in module web2vec.extractors.html_body_features)
num_external_iframes (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_external_iframes() (in module web2vec.extractors.html_body_features)
num_external_network_requests (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_external_scripts (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_external_scripts() (in module web2vec.extractors.html_body_features)
num_external_styles (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_external_styles() (in module web2vec.extractors.html_body_features)
num_forms (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_forms() (in module web2vec.extractors.html_body_features)
num_forms_external_action (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_forms_external_action() (in module web2vec.extractors.html_body_features)
num_forms_get (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_forms_get() (in module web2vec.extractors.html_body_features)
num_forms_post (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_forms_post() (in module web2vec.extractors.html_body_features)
num_hidden_elements (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_iframes_http (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_iframes_http() (in module web2vec.extractors.html_body_features)
num_images (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
num_images() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
num_internal_links (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_internal_links() (in module web2vec.extractors.html_body_features)
num_links (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
num_links() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
num_media_external (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_media_external() (in module web2vec.extractors.html_body_features)
num_media_http (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_media_http() (in module web2vec.extractors.html_body_features)
num_meta_tags (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_meta_tags() (in module web2vec.extractors.html_body_features)
num_network_requests (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_safe_anchors (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_safe_anchors() (in module web2vec.extractors.html_body_features)
num_scripts_http (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_scripts_http() (in module web2vec.extractors.html_body_features)
num_styles_http (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
num_styles_http() (in module web2vec.extractors.html_body_features)
num_titles (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
num_titles() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
number_of_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
number_of_subdomains (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
numeric_chars_ratio() (in module web2vec.extractors.url_lexical_features)
O
online (web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures attribute)
open_page_rank_api_key (web2vec.config.Config attribute)
OpenPageRankAPI (class in web2vec.extractors.external_api.open_pagerank_features)
OpenPageRankExtractor (class in web2vec.crawlers.extractors)
OpenPageRankFeatures (class in web2vec.extractors.external_api.open_pagerank_features)
OpenPhishExtractor (class in web2vec.crawlers.extractors)
OpenPhishFeatures (class in web2vec.extractors.external_api.open_phish_features)
org (web2vec.extractors.whois_features.WhoisFeatures attribute)
P
page_rank_decimal (web2vec.extractors.external_api.open_pagerank_features.OpenPageRankFeatures attribute)
PagePerVisit (web2vec.extractors.external_api.similar_web_features.Engagements attribute)
PaidReferrals (web2vec.extractors.external_api.similar_web_features.TrafficSource attribute)
parameters_length (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
parse() (web2vec.crawlers.spiders.Web2VecSpider method)
percentage_numeric_chars (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
phish_detail_url (web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures attribute)
phish_id (web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures attribute)
PhishTankExtractor (class in web2vec.crawlers.extractors)
PhishTankFeatures (class in web2vec.extractors.external_api.phish_tank_features)
position (web2vec.extractors.external_api.google_index_features.GoogleIndexFeatures attribute)
process_extractors() (in module web2vec.crawlers.extractors)
Q
qty_ip_resolved (web2vec.extractors.dns_features.DNSFeatures attribute)
qty_mx_servers (web2vec.extractors.dns_features.DNSFeatures attribute)
qty_nameservers (web2vec.extractors.dns_features.DNSFeatures attribute)
R
raw (web2vec.extractors.whois_features.WhoisFeatures attribute)
RawData (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
record_type (web2vec.extractors.dns_features.DNSRecordFeatures attribute)
records (web2vec.extractors.dns_features.DNSFeatures attribute)
redirect_count (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
redirects (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
referral_url (web2vec.extractors.whois_features.WhoisFeatures attribute)
Referrals (web2vec.extractors.external_api.similar_web_features.TrafficSource attribute)
registrar (web2vec.extractors.whois_features.WhoisFeatures attribute)
remote_url_output_path (web2vec.config.Config attribute)
repeated_digits_directory (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
repeated_digits_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
repeated_digits_parameters (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
repeated_digits_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
reporter (web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
right_click_disabled (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
right_click_disabled() (in module web2vec.extractors.html_body_features)
S
sanitize_filename() (in module web2vec.utils)
script_length (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
script_length() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
script_to_body_ratio (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
script_to_body_ratio() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
script_to_special_chars_ratio (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
script_to_special_chars_ratio() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
Search (web2vec.extractors.external_api.similar_web_features.TrafficSource attribute)
server_client_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
server_version (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
set_correct_path() (web2vec.config.Config class method)
SimilarWebExtractor (class in web2vec.crawlers.extractors)
SimilarWebFeatures (class in web2vec.extractors.external_api.similar_web_features)
SiteName (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
Social (web2vec.extractors.external_api.similar_web_features.TrafficSource attribute)
source_mode (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
special_characters (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
(web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
special_characters() (in module web2vec.extractors.html_body_features)
(in module web2vec.extractors.http_response_features)
ssl_verify (web2vec.config.Config attribute)
state (web2vec.extractors.whois_features.WhoisFeatures attribute)
status (web2vec.extractors.whois_features.WhoisFeatures attribute)
store_json() (in module web2vec.utils)
subdomain_count (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
subject (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
submission_time (web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures attribute)
suspicious_file_extension (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
T
tags (web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
target (web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures attribute)
threat (web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
time_domain_activation (web2vec.extractors.whois_features.WhoisFeatures attribute)
time_domain_expiration (web2vec.extractors.whois_features.WhoisFeatures attribute)
time_response (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
TimeOnSite (web2vec.extractors.external_api.similar_web_features.Engagements attribute)
Title (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
tld_amount_url (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
tld_count() (in module web2vec.extractors.url_lexical_features)
tld_popularity (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
tld_presence_in_arguments (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
token_count (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
token_count() (in module web2vec.extractors.url_lexical_features)
TopCountryShare (class in web2vec.extractors.external_api.similar_web_features)
TopCountryShares (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
TopKeyword (class in web2vec.extractors.external_api.similar_web_features)
TopKeywords (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
TrafficSource (class in web2vec.extractors.external_api.similar_web_features)
TrafficSources (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
transform_value() (in module web2vec.utils)
trust_message (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
ttl (web2vec.extractors.dns_features.DNSRecordFeatures attribute)
ttl_expires_within_day (web2vec.extractors.dns_features.DNSFeatures attribute)
ttl_expires_within_hour (web2vec.extractors.dns_features.DNSFeatures attribute)
ttl_expires_within_week (web2vec.extractors.dns_features.DNSFeatures attribute)
ttl_hostname (web2vec.extractors.dns_features.DNSFeatures attribute)
U
updated_date (web2vec.extractors.external_api.open_pagerank_features.OpenPageRankFeatures attribute)
(web2vec.extractors.whois_features.WhoisFeatures attribute)
url (web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures attribute)
(web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
(web2vec.extractors.url_geo_features.URLGeoFeatures attribute)
url_depth (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
url_depth() (in module web2vec.extractors.url_lexical_features)
url_google_index (web2vec.extractors.external_api.google_index_features.GoogleIndexFeatures attribute)
url_length (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
url_shortened (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
url_status (web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
UrlGeoExtractor (class in web2vec.crawlers.extractors)
URLGeoFeatures (class in web2vec.extractors.url_geo_features)
urlhaus_link (web2vec.extractors.external_api.url_haus_features.URLHausFeatures attribute)
UrlHausExtractor (class in web2vec.crawlers.extractors)
URLHausFeatures (class in web2vec.extractors.external_api.url_haus_features)
UrlLexicalExtractor (class in web2vec.crawlers.extractors)
URLLexicalFeatures (class in web2vec.extractors.url_lexical_features)
uses_https (web2vec.extractors.http_response_features.HttpResponseFeatures attribute)
uses_shortening_service (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
uses_shortening_service() (in module web2vec.extractors.url_lexical_features)
V
valid_in_30_days (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
valid_in_7_days (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
valid_ip() (in module web2vec.utils)
validity_duration_days (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
validity_message (web2vec.extractors.ssl_certification_features.CertificateFeatures attribute)
Value (web2vec.extractors.external_api.similar_web_features.TopCountryShare attribute)
values (web2vec.extractors.dns_features.DNSRecordFeatures attribute)
verification_time (web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures attribute)
verified (web2vec.extractors.external_api.phish_tank_features.PhishTankFeatures attribute)
Version (web2vec.extractors.external_api.similar_web_features.SimilarWebFeatures attribute)
Visits (web2vec.extractors.external_api.similar_web_features.Engagements attribute)
visits (web2vec.extractors.external_api.similar_web_features.EstimatedMonthlyVisit attribute)
Volume (web2vec.extractors.external_api.similar_web_features.TopKeyword attribute)
vowel_count_domain (web2vec.extractors.url_lexical_features.URLLexicalFeatures attribute)
W
was_js_rendered (web2vec.extractors.html_body_features.HtmlBodyFeatures attribute)
web2vec
module
web2vec.config
module
web2vec.crawlers
module
web2vec.crawlers.extractors
module
web2vec.crawlers.models
module
web2vec.crawlers.spiders
module
web2vec.extractors
module
web2vec.extractors.dns_features
module
web2vec.extractors.external_api
module
web2vec.extractors.external_api.google_index_features
module
web2vec.extractors.external_api.open_pagerank_features
module
web2vec.extractors.external_api.open_phish_features
module
web2vec.extractors.external_api.phish_tank_features
module
web2vec.extractors.external_api.similar_web_features
module
web2vec.extractors.external_api.url_haus_features
module
web2vec.extractors.html_body_features
module
web2vec.extractors.http_response_features
module
web2vec.extractors.network_features
module
web2vec.extractors.ssl_certification_features
module
web2vec.extractors.url_geo_features
module
web2vec.extractors.url_lexical_features
module
web2vec.extractors.whois_features
module
web2vec.utils
module
web2vec.version
module
Web2VecSpider (class in web2vec.crawlers.spiders)
WebPage (class in web2vec.crawlers.models)
whois_server (web2vec.extractors.whois_features.WhoisFeatures attribute)
WhoisExtractor (class in web2vec.crawlers.extractors)
WhoisFeatures (class in web2vec.extractors.whois_features)
Y
Year (web2vec.extractors.external_api.similar_web_features.Engagements attribute)
Z
zipcode (web2vec.extractors.whois_features.WhoisFeatures attribute)