Source code for hepdata.utils.miscellaneous
# This file is part of HEPData.
# Copyright (C) 2016 CERN.
#
# HEPData is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# HEPData is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HEPData; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
import re
import bleach
from flask import current_app
[docs]
def splitter(data, predicate):
"""Split a list according to a given predicate (lambda)."""
yes, no = [], []
for d in data:
(yes if predicate(d) else no).append(d)
return yes, no
[docs]
def sanitize_html(value, tags=None, attributes=None, strip=False):
"""Sanitize HTML.
:param tags: Allowed HTML ``tags``. Configuration set by Invenio-Config.
:param attributes: Allowed HTML ``attributes``. Configuration set by
Invenio-Config.
:param strip: Whether to strip tags that are not allowed. Defaults to
False (escapes rather than strips disallowed tags).
Use this function when you need to include unescaped HTML that contains
user provided data.
"""
if value is None:
return value
from flask import current_app
value = value.strip()
# Look for conditions like v1<x<v2 which look like invalid HTML,
# and escape them before running bleach
p = re.compile('<([^>]*?)<')
escaped = p.sub(r'<\1<', value)
tags = tags if tags is not None \
else current_app.config.get('ALLOWED_HTML_TAGS', {})
attributes = attributes if attributes is not None \
else current_app.config.get('ALLOWED_HTML_ATTRS', {})
cleaned = bleach.clean(
escaped,
tags=tags,
attributes=attributes,
strip=strip
)
return cleaned
[docs]
def generate_resource_url(resource):
"""
Uses the file_location/ID of a submission object to generate a resource url.
If "http" is at the beginning, will return file_location
Otherwise, will generate a HEPData resource URL
:param resource: DataResource object for generation
:return: The generated URL string
"""
# Determine if file_location is url or not
if resource.file_location.startswith("http"):
# Set url value if it's an external location
url_string = resource.file_location
else:
# If not url, create hepdata.net url using resource ID
site_url = current_app.config.get('SITE_URL', 'https://www.hepdata.net')
url_string = f"{site_url}/record/resource/{resource.id}?landing_page=true"
return url_string
[docs]
def get_resource_data(submission):
"""
Function to create a dictionary of description, type and url for resources objects.
This dictionary is to be added to the OpenSearch index.
Uses either a DataSubmission, or HEPSubmission, which both contain resource objects.
:param submission: HEPSubmission/DataSubmission object
:return: The resources list (of dictionaries)
"""
resources = []
# Create a dictionary entry for every resource
for s in submission.resources:
resource_data = {
"description": s.file_description,
"type": s.file_type,
"url": generate_resource_url(s)
}
resources.append(resource_data)
return resources