Source code for hepdata.modules.records.utils.analyses

# -*- coding: utf-8 -*-
#
# This file is part of HEPData.
# Copyright (C) 2021 CERN.
#
# HEPData is free software; you can redistribute it
# and/or modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# HEPData is distributed in the hope that it will be
# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with HEPData; if not, write to the
# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
# MA 02111-1307, USA.
#
# In applying this license, CERN does not
# waive the privileges and immunities granted to it by virtue of its status
# as an Intergovernmental Organization or submit itself to any jurisdiction.

import logging

from celery import shared_task
from flask import current_app
from invenio_db import db
import requests

from hepdata.ext.opensearch.api import index_record_ids
from hepdata.modules.submission.api import get_latest_hepsubmission, is_resource_added_to_submission
from hepdata.modules.submission.models import DataResource, HEPSubmission, data_reference_link

logging.basicConfig()
log = logging.getLogger(__name__)


[docs] @shared_task def update_analyses(endpoint=None): """ Update (Rivet and MadAnalysis 5) analyses and remove outdated resources. :param endpoint: either "Rivet" or "MadAnalysis" or None (default) for both """ endpoints = current_app.config["ANALYSES_ENDPOINTS"] for analysis_endpoint in endpoints: if endpoint and endpoint != analysis_endpoint: continue if "endpoint_url" in endpoints[analysis_endpoint]: log.info("Updating analyses from {0}...".format(analysis_endpoint)) response = requests.get(endpoints[analysis_endpoint]["endpoint_url"]) if response and response.status_code == 200: analyses = response.json() analysis_resources = DataResource.query.filter_by(file_type=analysis_endpoint).all() # Check for missing analyses. for record in analyses: submission = get_latest_hepsubmission(inspire_id=record, overall_status='finished') if submission: num_new_resources = 0 for analysis in analyses[record]: _resource_url = endpoints[analysis_endpoint]["url_template"].format(analysis) if not is_resource_added_to_submission(submission.publication_recid, submission.version, _resource_url): log.info('Adding {} analysis to ins{} with URL {}'.format( analysis_endpoint, record, _resource_url) ) new_resource = DataResource( file_location=_resource_url, file_type=analysis_endpoint) submission.resources.append(new_resource) num_new_resources += 1 else: # Remove resource from 'analysis_resources' list. resource = list(filter(lambda a: a.file_location == _resource_url, analysis_resources))[0] analysis_resources.remove(resource) if num_new_resources: try: db.session.add(submission) db.session.commit() latest_submission = get_latest_hepsubmission(inspire_id=record) if submission.version == latest_submission.version: index_record_ids([submission.publication_recid]) except Exception as e: db.session.rollback() log.error(e) else: log.debug("An analysis is available in {0} but with no equivalent in HEPData (ins{1}).".format( analysis_endpoint, record)) if analysis_resources: # Extra resources that were not found in the analyses JSON file. # Need to delete extra resources then reindex affected submissions. # Only take action if latest version is finished (most important case). try: recids_to_reindex = [] for extra_analysis_resource in analysis_resources: query = db.select([data_reference_link.columns.submission_id]).where( data_reference_link.columns.dataresource_id == extra_analysis_resource.id) results = db.session.execute(query) for result in results: submission_id = result[0] submission = HEPSubmission.query.filter_by(id=submission_id).first() latest_submission = get_latest_hepsubmission( publication_recid=submission.publication_recid, overall_status='finished' ) if submission and latest_submission and submission.version == latest_submission.version: log.info('Removing {} analysis with URL {} from submission {} version {}' .format(analysis_endpoint, extra_analysis_resource.file_location, submission.publication_recid, submission.version)) db.session.delete(extra_analysis_resource) recids_to_reindex.append(submission.publication_recid) db.session.commit() if recids_to_reindex: index_record_ids(list(set(recids_to_reindex))) # remove duplicates before indexing except Exception as e: db.session.rollback() log.error(e) else: log.debug("No endpoint url configured for {0}".format(analysis_endpoint))