Source code for hepdata.modules.records.utils.analyses
# -*- coding: utf-8 -*-
#
# This file is part of HEPData.
# Copyright (C) 2021 CERN.
#
# HEPData is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# HEPData is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HEPData; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.
import logging
from celery import shared_task
from flask import current_app
from invenio_db import db
import requests
from hepdata.ext.opensearch.api import index_record_ids
from hepdata.modules.submission.api import get_latest_hepsubmission, is_resource_added_to_submission
from hepdata.modules.submission.models import DataResource, HEPSubmission, data_reference_link
logging.basicConfig()
log = logging.getLogger(__name__)
[docs]
@shared_task
def update_analyses(endpoint=None):
"""
Update (Rivet and MadAnalysis 5) analyses and remove outdated resources.
:param endpoint: either "Rivet" or "MadAnalysis" or None (default) for both
"""
endpoints = current_app.config["ANALYSES_ENDPOINTS"]
for analysis_endpoint in endpoints:
if endpoint and endpoint != analysis_endpoint:
continue
if "endpoint_url" in endpoints[analysis_endpoint]:
log.info("Updating analyses from {0}...".format(analysis_endpoint))
response = requests.get(endpoints[analysis_endpoint]["endpoint_url"])
if response and response.status_code == 200:
analyses = response.json()
analysis_resources = DataResource.query.filter_by(file_type=analysis_endpoint).all()
# Check for missing analyses.
for record in analyses:
submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished')
if submission:
num_new_resources = 0
for analysis in analyses[record]:
_resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis)
if not is_resource_added_to_submission(submission.publication_recid, submission.version,
_resource_url):
log.info('Adding {} analysis to ins{} with URL {}'.format(
analysis_endpoint, record, _resource_url)
)
new_resource = DataResource(
file_location=_resource_url,
file_type=analysis_endpoint)
submission.resources.append(new_resource)
num_new_resources += 1
else:
# Remove resource from 'analysis_resources' list.
resource = list(filter(lambda a: a.file_location == _resource_url, analysis_resources))[0]
analysis_resources.remove(resource)
if num_new_resources:
try:
db.session.add(submission)
db.session.commit()
latest_submission = get_latest_hepsubmission(inspire_id=record)
if submission.version == latest_submission.version:
index_record_ids([submission.publication_recid])
except Exception as e:
db.session.rollback()
log.error(e)
else:
log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format(
analysis_endpoint, record))
if analysis_resources:
# Extra resources that were not found in the analyses JSON file.
# Need to delete extra resources then reindex affected submissions.
# Only take action if latest version is finished (most important case).
try:
recids_to_reindex = []
for extra_analysis_resource in analysis_resources:
query = db.select([data_reference_link.columns.submission_id]).where(
data_reference_link.columns.dataresource_id == extra_analysis_resource.id)
results = db.session.execute(query)
for result in results:
submission_id = result[0]
submission = HEPSubmission.query.filter_by(id=submission_id).first()
latest_submission = get_latest_hepsubmission(
publication_recid=submission.publication_recid, overall_status='finished'
)
if submission and latest_submission and submission.version == latest_submission.version:
log.info('Removing {} analysis with URL {} from submission {} version {}'
.format(analysis_endpoint, extra_analysis_resource.file_location,
submission.publication_recid, submission.version))
db.session.delete(extra_analysis_resource)
recids_to_reindex.append(submission.publication_recid)
db.session.commit()
if recids_to_reindex:
index_record_ids(list(set(recids_to_reindex))) # remove duplicates before indexing
except Exception as e:
db.session.rollback()
log.error(e)
else:
log.debug("No endpoint url configured for {0}".format(analysis_endpoint))