web2vec.extractors package
Subpackages
- web2vec.extractors.external_api package
- Submodules
- web2vec.extractors.external_api.google_index_features module
- web2vec.extractors.external_api.open_pagerank_features module
- web2vec.extractors.external_api.open_phish_features module
- web2vec.extractors.external_api.phish_tank_features module
- web2vec.extractors.external_api.similar_web_features module
- web2vec.extractors.external_api.url_haus_features module
- Module contents
- Submodules
Submodules
- web2vec.extractors.dns_features module
DNSFeaturesDNSFeatures.compute_derived_features()DNSFeatures.count_ipsDNSFeatures.count_mx_serversDNSFeatures.count_name_serversDNSFeatures.domainDNSFeatures.domain_spfDNSFeatures.extract_ttlDNSFeatures.min_ttlDNSFeatures.qty_ip_resolvedDNSFeatures.qty_mx_serversDNSFeatures.qty_nameserversDNSFeatures.recordsDNSFeatures.ttl_expires_within_dayDNSFeatures.ttl_expires_within_hourDNSFeatures.ttl_expires_within_weekDNSFeatures.ttl_hostname
DNSRecordFeaturesget_dns_features()get_dns_features_cached()
- web2vec.extractors.html_body_features module
HtmlBodyFeaturesHtmlBodyFeatures.body_lengthHtmlBodyFeatures.body_to_special_char_ratioHtmlBodyFeatures.contains_formsHtmlBodyFeatures.contains_obfuscated_scriptsHtmlBodyFeatures.contains_suspicious_keywordsHtmlBodyFeatures.copyrightHtmlBodyFeatures.favicon_urlHtmlBodyFeatures.found_anchorsHtmlBodyFeatures.found_api_endpointsHtmlBodyFeatures.found_formsHtmlBodyFeatures.found_imagesHtmlBodyFeatures.found_mediaHtmlBodyFeatures.found_network_requestsHtmlBodyFeatures.html_snapshot_pathHtmlBodyFeatures.iframe_redirectionHtmlBodyFeatures.likely_js_spaHtmlBodyFeatures.logo_urlHtmlBodyFeatures.mouse_over_effectHtmlBodyFeatures.num_api_endpointsHtmlBodyFeatures.num_email_formsHtmlBodyFeatures.num_external_iframesHtmlBodyFeatures.num_external_network_requestsHtmlBodyFeatures.num_external_scriptsHtmlBodyFeatures.num_external_stylesHtmlBodyFeatures.num_formsHtmlBodyFeatures.num_forms_external_actionHtmlBodyFeatures.num_forms_getHtmlBodyFeatures.num_forms_postHtmlBodyFeatures.num_hidden_elementsHtmlBodyFeatures.num_iframes_httpHtmlBodyFeatures.num_imagesHtmlBodyFeatures.num_internal_linksHtmlBodyFeatures.num_linksHtmlBodyFeatures.num_media_externalHtmlBodyFeatures.num_media_httpHtmlBodyFeatures.num_meta_tagsHtmlBodyFeatures.num_network_requestsHtmlBodyFeatures.num_safe_anchorsHtmlBodyFeatures.num_scripts_httpHtmlBodyFeatures.num_styles_httpHtmlBodyFeatures.num_titlesHtmlBodyFeatures.right_click_disabledHtmlBodyFeatures.script_lengthHtmlBodyFeatures.script_to_body_ratioHtmlBodyFeatures.script_to_special_chars_ratioHtmlBodyFeatures.source_modeHtmlBodyFeatures.special_charactersHtmlBodyFeatures.was_js_rendered
body_length()body_to_special_char_ratio()check_obfuscated_scripts()check_suspicious_keywords()detect_api_endpoints()detect_likely_js_spa()find_copyright()find_favicon()find_logo()get_html_body_features()hidden_elements()iframe_redirection()is_external_url()mouse_over_effect()num_email_forms()num_external_iframes()num_external_scripts()num_external_styles()num_forms()num_forms_external_action()num_forms_get()num_forms_post()num_iframes_http()num_images()num_internal_links()num_links()num_media_external()num_media_http()num_meta_tags()num_safe_anchors()num_scripts_http()num_styles_http()num_titles()right_click_disabled()script_length()script_to_body_ratio()script_to_special_chars_ratio()special_characters()
- web2vec.extractors.http_response_features module
HttpResponseFeaturesHttpResponseFeatures.body_lengthHttpResponseFeatures.body_to_special_char_ratioHttpResponseFeatures.contains_formsHttpResponseFeatures.contains_obfuscated_scriptsHttpResponseFeatures.contains_suspicious_keywordsHttpResponseFeatures.is_liveHttpResponseFeatures.missing_content_security_policyHttpResponseFeatures.missing_strict_transport_securityHttpResponseFeatures.missing_x_content_type_optionsHttpResponseFeatures.missing_x_frame_optionsHttpResponseFeatures.missing_x_xss_protectionHttpResponseFeatures.num_imagesHttpResponseFeatures.num_linksHttpResponseFeatures.num_titlesHttpResponseFeatures.redirect_countHttpResponseFeatures.redirectsHttpResponseFeatures.script_lengthHttpResponseFeatures.script_to_body_ratioHttpResponseFeatures.script_to_special_chars_ratioHttpResponseFeatures.server_versionHttpResponseFeatures.special_charactersHttpResponseFeatures.time_responseHttpResponseFeatures.uses_https
body_length()body_to_special_char_ratio()check_forms()check_header_content_security_policy()check_header_strict_transport_security()check_header_x_content_type_options()check_header_x_frame_options()check_header_x_xss_protection()check_https()check_obfuscated_scripts()check_redirects()check_server_version()check_suspicious_keywords()count_redirects()get_http_response_features()is_live()num_images()num_links()num_titles()script_length()script_to_body_ratio()script_to_special_chars_ratio()special_characters()
- web2vec.extractors.network_features module
- web2vec.extractors.ssl_certification_features module
CertificateFeaturesCertificateFeatures.days_until_expirationCertificateFeatures.expires_within_30_daysCertificateFeatures.expires_within_7_daysCertificateFeatures.is_expiredCertificateFeatures.is_trustedCertificateFeatures.is_validCertificateFeatures.issuerCertificateFeatures.issuer_common_nameCertificateFeatures.issuer_is_free_caCertificateFeatures.issuer_is_lets_encryptCertificateFeatures.issuer_organization_nameCertificateFeatures.not_afterCertificateFeatures.not_beforeCertificateFeatures.subjectCertificateFeatures.trust_messageCertificateFeatures.valid_in_30_daysCertificateFeatures.valid_in_7_daysCertificateFeatures.validity_duration_daysCertificateFeatures.validity_message
check_ssl()get_certificate_features()get_certificate_features_cached()get_tls_certificate()is_certificate_trusted()is_certificate_valid()
- web2vec.extractors.url_geo_features module
- web2vec.extractors.url_lexical_features module
URLLexicalFeaturesURLLexicalFeatures.average_subdomain_lengthURLLexicalFeatures.count_ampersand_directoryURLLexicalFeatures.count_ampersand_domainURLLexicalFeatures.count_ampersand_parametersURLLexicalFeatures.count_ampersand_urlURLLexicalFeatures.count_asterisk_directoryURLLexicalFeatures.count_asterisk_domainURLLexicalFeatures.count_asterisk_parametersURLLexicalFeatures.count_asterisk_urlURLLexicalFeatures.count_at_directoryURLLexicalFeatures.count_at_domainURLLexicalFeatures.count_at_parametersURLLexicalFeatures.count_at_urlURLLexicalFeatures.count_comma_directoryURLLexicalFeatures.count_comma_domainURLLexicalFeatures.count_comma_parametersURLLexicalFeatures.count_comma_urlURLLexicalFeatures.count_dash_directoryURLLexicalFeatures.count_dash_domainURLLexicalFeatures.count_dash_parametersURLLexicalFeatures.count_dash_urlURLLexicalFeatures.count_dollar_directoryURLLexicalFeatures.count_dollar_domainURLLexicalFeatures.count_dollar_parametersURLLexicalFeatures.count_dollar_urlURLLexicalFeatures.count_dot_directoryURLLexicalFeatures.count_dot_domainURLLexicalFeatures.count_dot_parametersURLLexicalFeatures.count_dot_urlURLLexicalFeatures.count_equals_directoryURLLexicalFeatures.count_equals_domainURLLexicalFeatures.count_equals_parametersURLLexicalFeatures.count_equals_urlURLLexicalFeatures.count_exclamation_directoryURLLexicalFeatures.count_exclamation_domainURLLexicalFeatures.count_exclamation_parametersURLLexicalFeatures.count_exclamation_urlURLLexicalFeatures.count_hash_directoryURLLexicalFeatures.count_hash_domainURLLexicalFeatures.count_hash_parametersURLLexicalFeatures.count_hash_urlURLLexicalFeatures.count_percent_directoryURLLexicalFeatures.count_percent_domainURLLexicalFeatures.count_percent_parametersURLLexicalFeatures.count_percent_urlURLLexicalFeatures.count_plus_directoryURLLexicalFeatures.count_plus_domainURLLexicalFeatures.count_plus_parametersURLLexicalFeatures.count_plus_urlURLLexicalFeatures.count_question_directoryURLLexicalFeatures.count_question_domainURLLexicalFeatures.count_question_parametersURLLexicalFeatures.count_question_urlURLLexicalFeatures.count_slash_directoryURLLexicalFeatures.count_slash_domainURLLexicalFeatures.count_slash_parametersURLLexicalFeatures.count_slash_urlURLLexicalFeatures.count_space_directoryURLLexicalFeatures.count_space_domainURLLexicalFeatures.count_space_parametersURLLexicalFeatures.count_space_urlURLLexicalFeatures.count_tilde_directoryURLLexicalFeatures.count_tilde_domainURLLexicalFeatures.count_tilde_parametersURLLexicalFeatures.count_tilde_urlURLLexicalFeatures.count_underscore_directoryURLLexicalFeatures.count_underscore_domainURLLexicalFeatures.count_underscore_parametersURLLexicalFeatures.count_underscore_urlURLLexicalFeatures.directory_lengthURLLexicalFeatures.domain_contains_keywordsURLLexicalFeatures.domain_entropyURLLexicalFeatures.domain_in_ip_formatURLLexicalFeatures.domain_lengthURLLexicalFeatures.email_present_in_urlURLLexicalFeatures.entropy_of_urlURLLexicalFeatures.having_anchorURLLexicalFeatures.having_digit_in_subdomainURLLexicalFeatures.having_fragmentURLLexicalFeatures.having_hyphen_in_subdomainURLLexicalFeatures.having_special_char_in_subdomainURLLexicalFeatures.having_underscore_in_subdomainURLLexicalFeatures.is_ipURLLexicalFeatures.number_of_parametersURLLexicalFeatures.number_of_subdomainsURLLexicalFeatures.parameters_lengthURLLexicalFeatures.percentage_numeric_charsURLLexicalFeatures.repeated_digits_directoryURLLexicalFeatures.repeated_digits_domainURLLexicalFeatures.repeated_digits_parametersURLLexicalFeatures.repeated_digits_urlURLLexicalFeatures.server_client_domainURLLexicalFeatures.subdomain_countURLLexicalFeatures.suspicious_file_extensionURLLexicalFeatures.tld_amount_urlURLLexicalFeatures.tld_popularityURLLexicalFeatures.tld_presence_in_argumentsURLLexicalFeatures.token_countURLLexicalFeatures.url_depthURLLexicalFeatures.url_lengthURLLexicalFeatures.url_shortenedURLLexicalFeatures.uses_shortening_serviceURLLexicalFeatures.vowel_count_domain
contains_keywords()count_char()count_vowels()get_url_lexical_features()get_url_lexical_features_cached()has_repeated_digits()numeric_chars_ratio()tld_count()token_count()url_depth()uses_shortening_service()
- web2vec.extractors.whois_features module
WhoisFeaturesWhoisFeatures.addressWhoisFeatures.cityWhoisFeatures.countryWhoisFeatures.created_within_30_daysWhoisFeatures.created_within_365_daysWhoisFeatures.creation_dateWhoisFeatures.creation_datetimeWhoisFeatures.days_until_expirationWhoisFeatures.dnssecWhoisFeatures.domain_ageWhoisFeatures.domain_age_daysWhoisFeatures.domain_nameWhoisFeatures.emailsWhoisFeatures.expiration_dateWhoisFeatures.expiration_datetimeWhoisFeatures.expires_within_30_daysWhoisFeatures.expires_within_7_daysWhoisFeatures.is_expiredWhoisFeatures.nameWhoisFeatures.name_serversWhoisFeatures.orgWhoisFeatures.rawWhoisFeatures.referral_urlWhoisFeatures.registrarWhoisFeatures.stateWhoisFeatures.statusWhoisFeatures.time_domain_activationWhoisFeatures.time_domain_expirationWhoisFeatures.updated_dateWhoisFeatures.whois_serverWhoisFeatures.zipcode
get_whois_features()get_whois_features_cached()