Source code for bok_choy.page_object

"""
Base implementation of the Page Object pattern.
See https://github.com/SeleniumHQ/selenium/wiki/PageObjects
and http://www.seleniumhq.org/docs/06_test_design_considerations.jsp#page-object-design-pattern
"""

from abc import ABCMeta, abstractmethod
from collections import defaultdict
from functools import wraps
from contextlib import contextmanager
import logging
import os
import socket
import re
from textwrap import dedent
from urllib import parse
from lazy import lazy

from selenium.common.exceptions import WebDriverException

from .query import BrowserQuery, no_error
from .promise import Promise, EmptyPromise, BrokenPromise
from .a11y import AxeCoreAudit, AxsAudit


LOGGER = logging.getLogger(__name__)

# String that can be used to test for XSS vulnerabilities.
# Taken from https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#XSS_Locator.
XSS_INJECTION = "'';!--\"<XSS>=&{()}"

# When the injected string appears within an attribute (for instance, value of an input tag,
# or alt of an img tag), if it is properly escaped this is the format we will see from
# document.documentElement.innerHTML. To avoid false positives, we need to allow this
# specific string, which hopefully is unique/odd enough that it would never appear accidentally.
EXPECTED_ATTRIBUTE_FORMAT = re.compile(r'\'\';!--&quot;<xss>=&amp;{\(\)}')

XSS_HTML = "<xss"


[docs]class WrongPageError(WebDriverException): """ The page object reports that we're on the wrong page! """
[docs]class PageLoadError(WebDriverException): """ An error occurred while loading the page. """
[docs]class XSSExposureError(Exception): """ An XSS issue has been found on the current page. """
[docs]def no_selenium_errors(func): """ Decorator to create an `EmptyPromise` check function that is satisfied only when `func` executes without a Selenium error. This protects against many common test failures due to timing issues. For example, accessing an element after it has been modified by JavaScript ordinarily results in a `StaleElementException`. Methods decorated with `no_selenium_errors` will simply retry if that happens, which makes tests more robust. Args: func (callable): The function to execute, with retries if an error occurs. Returns: Decorated function """ def _inner(*args, **kwargs): try: return_val = func(*args, **kwargs) except WebDriverException: LOGGER.warning('Exception ignored during retry loop:', exc_info=True) return False return return_val return _inner
[docs]def unguarded(method): """ Mark a PageObject method as unguarded. Unguarded methods don't verify that the PageObject is on the current browser page before they execute Args: method (callable): The method to decorate. Returns: Decorated method """ method._unguarded = True # pylint: disable=protected-access return method
[docs]def pre_verify(method): """ Decorator that calls self._verify_page() before executing the decorated method Args: method (callable): The method to decorate. Returns: Decorated method """ @wraps(method) def wrapper(self, *args, **kwargs): self._verify_page() # pylint: disable=protected-access return method(self, *args, **kwargs) return wrapper
class _PageObjectMetaclass(ABCMeta): """ Decorates any callable attributes of the class so that they call self._verify_page() before executing. Excludes any methods marked as unguarded with the @unguarded decorator, any methods starting with _, or in the list ALWAYS_UNGUARDED. """ ALWAYS_UNGUARDED = ['url', 'is_browser_on_page'] def __new__(mcs, cls_name, cls_bases, cls_attrs, **kwargs): for name, attr in list(cls_attrs.items()): # Skip methods marked as unguarded if getattr(attr, '_unguarded', False) or name in mcs.ALWAYS_UNGUARDED: continue # Skip private methods if name.startswith('_'): continue # Skip class attributes that are classes themselves if isinstance(attr, type): continue is_property = isinstance(attr, property) # Skip non-callable attributes if not (callable(attr) or is_property): continue if is_property: # For properties, wrap each of the sub-methods separately property_methods = defaultdict(None) for fn_name in ('fdel', 'fset', 'fget'): prop_fn = getattr(cls_attrs[name], fn_name, None) if prop_fn is not None: # Check for unguarded properties if getattr(prop_fn, '_unguarded', False): property_methods[fn_name] = prop_fn else: property_methods[fn_name] = pre_verify(prop_fn) cls_attrs[name] = property(**property_methods) else: cls_attrs[name] = pre_verify(attr) return super().__new__(mcs, cls_name, cls_bases, cls_attrs)
[docs]class PageObject(metaclass=_PageObjectMetaclass): """ Encapsulates user interactions with a specific part of a web application. The most important thing is this: Page objects encapsulate Selenium. If you find yourself writing CSS selectors in tests, manipulating forms, or otherwise interacting directly with the web UI, stop! Instead, put these in a :class:`PageObject` subclass :) PageObjects do their best to verify that they are only used when the browser is on a page containing the object. To do this, they will call :meth:`is_browser_on_page` before executing any of their methods, and raise a :class:`WrongPageError` if the browser isn't on the correct page. Generally, this is the right behavior. However, at times it will be useful to not verify the page before executing a method. In those cases, the method can be marked with the :func:`unguarded` decorator. Additionally, private methods (those beginning with `_`) are always unguarded. Class or instance properties are never guarded. However, methods marked with the :func:`property` are candidates for being guarded. To make them unguarded, you must mark the getter, setter, and deleter as :func:`unguarded` separately, and those decorators must be applied before the :func:`property` decorator. Correct:: @property @unguarded def foo(self): return self._foo Incorrect:: @unguarded @property def foo(self): return self._foo """ def __init__(self, browser, *args, **kwargs): """ Initialize the page object to use the specified browser instance. Args: browser (selenium.webdriver): The Selenium-controlled browser. Returns: PageObject """ super().__init__(*args, **kwargs) self.browser = browser a11y_flag = os.environ.get('VERIFY_ACCESSIBILITY', 'False') self.verify_accessibility = a11y_flag.lower() == 'true' xss_flag = os.environ.get('VERIFY_XSS', 'False') self.verify_xss = xss_flag.lower() == 'true'
[docs] @lazy def a11y_audit(self): """ Initializes the a11y_audit attribute. """ rulesets = { "axe_core": AxeCoreAudit, "google_axs": AxsAudit, } ruleset = rulesets[ os.environ.get("BOKCHOY_A11Y_RULESET", 'axe_core')] return ruleset(self.browser, self.url)
[docs] @abstractmethod def is_browser_on_page(self): """ Check that we are on the right page in the browser. The specific check will vary from page to page, but usually this amounts to checking the: 1) browser URL 2) page title 3) page headings Returns: A `bool` indicating whether the browser is on the correct page. """ return False
@property @abstractmethod def url(self): """ Return the URL of the page. This may be dynamic, determined by configuration options passed to the page object's constructor. Some pages may not be directly accessible: perhaps the page object represents a "navigation" component that occurs on multiple pages. If this is the case, subclasses can return `None` to indicate that you can't directly visit the page object. """ return None
[docs] @unguarded def warning(self, msg): """ Subclasses call this to indicate that something unexpected occurred while interacting with the page. Page objects themselves should never make assertions or raise exceptions, but they can issue warnings to make tests easier to debug. Args: msg (str): The message to log as a warning. Returns: None """ log = logging.getLogger(self.__class__.__name__) log.warning(msg)
[docs] @unguarded def visit(self): """ Open the page containing this page object in the browser. Some page objects may not provide a URL, in which case a `NotImplementedError` will be raised. Raises: PageLoadError: The page did not load successfully. NotImplementedError: The page object does not provide a URL to visit. Returns: PageObject """ if self.url is None: raise NotImplementedError(f"Page {self} does not provide a URL to visit.") # Validate the URL if not self.validate_url(self.url): raise PageLoadError(f"Invalid URL: '{self.url}'") # Visit the URL try: self.browser.get(self.url) except (WebDriverException, socket.gaierror) as err: LOGGER.warning("Unexpected page load exception:", exc_info=True) raise PageLoadError( "Could not load page '{!r}' at URL '{}'".format( # pylint: disable=consider-using-f-string self, self.url) ) from err # Give the browser enough time to get to the page, then return the page object # so that the caller can chain the call with an action: # Example: FooPage.visit().do_something() # # A BrokenPromise will be raised if the page object's is_browser_on_page method # does not return True before timing out. try: return self.wait_for_page() except BrokenPromise as err: raise PageLoadError( "Timed out waiting to load page '{!r}' at URL '{}'".format( # pylint: disable=consider-using-f-string self, self.url ) ) from err
[docs] @classmethod @unguarded def validate_url(cls, url): """ Return a boolean indicating whether the URL has a protocol and hostname. If a port is specified, ensure it is an integer. Arguments: url (str): The URL to check. Returns: Boolean indicating whether the URL has a protocol and hostname. """ result = parse.urlsplit(url) # Check that we have a protocol and hostname if not result.scheme: LOGGER.warning("%s is missing a protocol", url) return False if not result.netloc: LOGGER.warning("%s is missing a hostname", url) return False # Check that the port is an integer try: if result.port is not None: int(result.port) elif result.netloc.endswith(':'): # Valid URLs do not end with colons. LOGGER.warning("%s has a colon after the hostname but no port", url) return False except ValueError: LOGGER.warning("%s uses an invalid port", url) return False return True
def _verify_page(self): """ Ask the page object if we're on the right page; if not, raise a `WrongPageError`. """ if not self.is_browser_on_page(): msg = "Not on the correct page to use '{!r}' at URL '{}'".format( # pylint: disable=consider-using-f-string self, self.url ) raise WrongPageError(msg) def _verify_xss_exposure(self): """ Verify that there are no obvious XSS exposures on the page (based on test authors including XSS_INJECTION in content rendered on the page). If an xss issue is found, raise a 'XSSExposureError'. """ # Use innerHTML to get dynamically injected HTML as well as server-side HTML. html_source = self.browser.execute_script( "return document.documentElement.innerHTML.toLowerCase()" ) # Check taken from https://www.owasp.org/index.php/XSS_Filter_Evasion_Cheat_Sheet#XSS_Locator. all_hits_count = html_source.count(XSS_HTML) if all_hits_count > 0: safe_hits_count = len(EXPECTED_ATTRIBUTE_FORMAT.findall(html_source)) if all_hits_count > safe_hits_count: potential_hits = re.findall('<[^<]+<xss', html_source) raise XSSExposureError( f"{all_hits_count - safe_hits_count} XSS issue(s) found on page. " f"Potential places are {potential_hits}" )
[docs] @unguarded def wait_for_page(self, timeout=30): """ Block until the page loads, then returns the page. Useful for ensuring that we navigate successfully to a particular page. Keyword Args: timeout (int): The number of seconds to wait for the page before timing out with an exception. Raises: BrokenPromise: The timeout is exceeded without the page loading successfully. """ def _is_document_interactive(): """ Check the loading state of the document to ensure the document is in interactive mode """ return self.browser.execute_script( "return document.readyState=='interactive'") def _is_document_ready(): """ Check the loading state of the document to ensure the document and all sub-resources have finished loading (the document load event has been fired.) """ return self.browser.execute_script( "return document.readyState=='complete'") try: # Wait for page to load completely i.e. for document.readyState to become complete EmptyPromise( _is_document_ready, "The document and all sub-resources have finished loading.", timeout=timeout ).fulfill() except BrokenPromise: # pylint: disable=logging-format-interpolation LOGGER.warning( 'document.readyState does not become complete ' # pylint: disable=consider-using-f-string 'for following url: {}'.format(self.url), exc_info=True ) # If document.readyState does not become complete after a specific time relax the # condition and check for interactive state EmptyPromise( _is_document_interactive, "The document is in interactive mode.", timeout=timeout ).fulfill() result = Promise( lambda: (self.is_browser_on_page(), self), f"loaded page {self!r}", timeout=timeout ).fulfill() if self.verify_accessibility: self.a11y_audit.check_for_accessibility_errors() return result
[docs] @unguarded def q(self, **kwargs): # pylint: disable=invalid-name """ Construct a query on the browser. Example usages: .. code:: python self.q(css="div.foo").first.click() self.q(xpath="/foo/bar").text Keyword Args: css: A CSS selector. xpath: An XPath selector. Returns: BrowserQuery """ if self.verify_xss: self._verify_xss_exposure() return BrowserQuery(self.browser, **kwargs)
[docs] @contextmanager def handle_alert(self, confirm=True): """ Context manager that ensures alerts are dismissed. Example usage: .. code:: python with self.handle_alert(): self.q(css='input.submit-button').first.click() Keyword Args: confirm (bool): Whether to confirm or cancel the alert. Returns: None """ # Before executing the `with` block, stub the confirm/alert functions script = dedent(f""" window.confirm = function() {{ return {"true" if confirm else "false"}; }}; window.alert = function() {{ return; }}; """).strip() self.browser.execute_script(script) # Execute the `with` block yield
[docs] @unguarded def wait_for_ajax(self, timeout=30): """ Wait for jQuery to be loaded and for all ajax requests to finish. Note that we have to wait for jQuery to load first because it is used to check that ajax requests are complete. Important: If you have an ajax requests that results in a page reload, you will need to use wait_for_page or some other method to confirm that the page has finished reloading after wait_for_ajax has returned. Example usage: .. code:: python self.q(css='input#email').fill("foo") self.wait_for_ajax() Keyword Args: timeout (int): The number of seconds to wait before timing out with a BrokenPromise exception. Returns: None Raises: BrokenPromise: The timeout is exceeded before (1) jQuery is defined and (2) all ajax requests are completed. """ def _is_ajax_finished(): """ Check if all the ajax calls on the current page have completed. """ # Wait for jQuery to be defined first, so that jQuery.active # doesn't raise an error that 'jQuery is not defined'. We have # seen this as a flaky pattern possibly related to pages reloading # while wait_for_ajax is being called. return self.browser.execute_script( "return typeof(jQuery)!='undefined' && jQuery.active==0") EmptyPromise( _is_ajax_finished, "Finished waiting for ajax requests.", timeout=timeout ).fulfill()
[docs] @unguarded def wait_for(self, promise_check_func, description, result=False, timeout=60): """ Calls the method provided as an argument until the Promise satisfied or BrokenPromise. Retries if a WebDriverException is encountered (until the timeout is reached). Arguments: promise_check_func (callable): * If `result` is False Then Function that accepts no arguments and returns a boolean indicating whether the promise is fulfilled * If `result` is True Then Function that accepts no arguments and returns a `(is_satisfied, result)` tuple, where `is_satisfied` is a boolean indicating whether the promise was satisfied, and `result` is a value to return from the fulfilled `Promise` description (str): Description of the Promise, used in log messages result (bool): Indicates whether we need result timeout (float): Maximum number of seconds to wait for the Promise to be satisfied before timing out Raises: BrokenPromise: the `Promise` was not satisfied """ if result: return Promise(no_error(promise_check_func), description, timeout=timeout).fulfill() return EmptyPromise(no_selenium_errors(promise_check_func), description, timeout=timeout).fulfill()
[docs] @unguarded def wait_for_element_presence(self, element_selector, description, timeout=60): """ Waits for element specified by `element_selector` to be present in DOM. Example usage: .. code:: python self.wait_for_element_presence('.submit', 'Submit Button is Present') Arguments: element_selector (str): css selector of the element. description (str): Description of the Promise, used in log messages. timeout (float): Maximum number of seconds to wait for the Promise to be satisfied before timing out """ self.wait_for(lambda: self.q(css=element_selector).present, description=description, timeout=timeout)
[docs] @unguarded def wait_for_element_absence(self, element_selector, description, timeout=60): """ Waits for element specified by `element_selector` until it disappears from DOM. Example usage: .. code:: python self.wait_for_element_absence('.submit', 'Submit Button is not Present') Arguments: element_selector (str): css selector of the element. description (str): Description of the Promise, used in log messages. timeout (float): Maximum number of seconds to wait for the Promise to be satisfied before timing out """ self.wait_for(lambda: not self.q(css=element_selector).present, description=description, timeout=timeout)
[docs] @unguarded def wait_for_element_visibility(self, element_selector, description, timeout=60): """ Waits for element specified by `element_selector` until it is displayed on web page. Example usage: .. code:: python self.wait_for_element_visibility('.submit', 'Submit Button is Visible') Arguments: element_selector (str): css selector of the element. description (str): Description of the Promise, used in log messages. timeout (float): Maximum number of seconds to wait for the Promise to be satisfied before timing out """ self.wait_for(lambda: self.q(css=element_selector).visible, description=description, timeout=timeout)
[docs] @unguarded def wait_for_element_invisibility(self, element_selector, description, timeout=60): """ Waits for element specified by `element_selector` until it disappears from the web page. Example usage: .. code:: python self.wait_for_element_invisibility('.submit', 'Submit Button Disappeared') Arguments: element_selector (str): css selector of the element. description (str): Description of the Promise, used in log messages. timeout (float): Maximum number of seconds to wait for the Promise to be satisfied before timing out """ self.wait_for(lambda: self.q(css=element_selector).invisible, description=description, timeout=timeout)
[docs] @unguarded def scroll_to_element(self, element_selector, timeout=60): """ Scrolls the browser such that the element specified appears at the top. Before scrolling, waits for the element to be present. Example usage: .. code:: python self.scroll_to_element('.far-down', 'Scroll to far-down') Arguments: element_selector (str): css selector of the element. timeout (float): Maximum number of seconds to wait for the element to be present on the page before timing out. Raises: BrokenPromise if the element does not exist (and therefore scrolling to it is not possible) """ # Ensure element exists msg = f"Element '{element_selector}' is present" self.wait_for(lambda: self.q(css=element_selector).present, msg, timeout=timeout) # Obtain coordinates and use those for JavaScript call loc = self.q(css=element_selector).first.results[0].location self.browser.execute_script(f"window.scrollTo({loc['x']},{loc['y']})")