Source code for hepdata.utils.miscellaneous

# This file is part of HEPData.
# Copyright (C) 2016 CERN.
#
# HEPData is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# HEPData is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HEPData; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

import re

import bleach
from flask import current_app



[docs]
def splitter(data, predicate):
    """Split a list according to a given predicate (lambda)."""
    yes, no = [], []
    for d in data:
        (yes if predicate(d) else no).append(d)
    return yes, no




[docs]
def sanitize_html(value, tags=None, attributes=None, strip=False):
    """Sanitize HTML.

    :param tags: Allowed HTML ``tags``. Configuration set by Invenio-Config.
    :param attributes: Allowed HTML ``attributes``. Configuration set by
        Invenio-Config.
    :param strip: Whether to strip tags that are not allowed. Defaults to
        False (escapes rather than strips disallowed tags).

    Use this function when you need to include unescaped HTML that contains
    user provided data.
    """
    if value is None:
        return value

    from flask import current_app

    value = value.strip()

    # Look for conditions like v1<x<v2 which look like invalid HTML,
    # and escape them before running bleach
    p = re.compile('<([^>]*?)<')
    escaped = p.sub(r'&lt;\1&lt;', value)

    tags = tags if tags is not None \
        else current_app.config.get('ALLOWED_HTML_TAGS', {})
    attributes = attributes if attributes is not None \
        else current_app.config.get('ALLOWED_HTML_ATTRS', {})

    cleaned = bleach.clean(
        escaped,
        tags=tags,
        attributes=attributes,
        strip=strip
    )

    return cleaned



[docs]
def generate_resource_url(resource):
    """
    Uses the file_location/ID of a submission object to generate a resource url.
    If "http" is at the beginning, will return file_location
    Otherwise, will generate a HEPData resource URL

    :param resource: DataResource object for generation
    :return: The generated URL string
    """
    # Determine if file_location is url or not
    if resource.file_location.startswith("http"):
        # Set url value if it's an external location
        url_string = resource.file_location
    else:
        # If not url, create hepdata.net url using resource ID
        site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
        url_string = f"{site_url}/record/resource/{resource.id}?landing_page=true"

    return url_string




[docs]
def get_resource_data(submission):
    """
    Function to create a dictionary of description, type and url for resources objects.
    This dictionary is to be added to the OpenSearch index.
    Uses either a DataSubmission, or HEPSubmission, which both contain resource objects.

    :param submission: HEPSubmission/DataSubmission object
    :return: The resources list (of dictionaries)
    """
    resources = []

    # Create a dictionary entry for every resource
    for s in submission.resources:
        resource_data = {
            "description": s.file_description,
            "type": s.file_type,
            "url": generate_resource_url(s)
        }

        resources.append(resource_data)

    return resources
Source code for hepdata.utils.miscellaneous

HEPData

Navigation

Related Topics