Source code for perceval.backends.core.jira

# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2020 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
#     Alberto Martín <alberto.martin@bitergia.com>
#     Santiago Dueñas <sduenas@bitergia.com>
#     Stephan Barth <stephan.barth@gmail.com>
#     Valerio Cosentino <valcos@bitergia.com>
#     Jesus M. Gonzalez-Barahona <jgb@gsyc.es>
#     Harshal Mittal <harshalmittal4@gmail.com>
#

import json
import logging

import requests

from requests.packages.urllib3.exceptions import InsecureRequestWarning

from grimoirelab_toolkit.datetime import datetime_to_utc, str_to_datetime
from grimoirelab_toolkit.uris import urijoin

from ...backend import (Backend,
                        BackendCommand,
                        BackendCommandArgumentParser)
from ...client import HttpClient
from ...utils import DEFAULT_DATETIME

CATEGORY_ISSUE = "issue"
MAX_RESULTS = 100  # Maximum number of results per query

logger = logging.getLogger(__name__)


[docs]def map_custom_field(custom_fields, fields): """Add extra information for custom fields. :param custom_fields: set of custom fields with the extra information :param fields: fields of the issue where to add the extra information :returns: an set of items with the extra information mapped """ def build_cf(cf, v): return {'id': cf['id'], 'name': cf['name'], 'value': v} return { k: build_cf(custom_fields[k], v) for k, v in fields.items() if k in custom_fields }
[docs]def filter_custom_fields(fields): """Filter custom fields from a given set of fields. :param fields: set of fields :returns: an object with the filtered custom fields """ custom_fields = {} sorted_fields = [field for field in fields if field['custom'] is True] for custom_field in sorted_fields: custom_fields[custom_field['id']] = custom_field return custom_fields
[docs]class Jira(Backend): """JIRA backend for Perceval. This class retrieves the issues stored in JIRA issue tracking system. To initialize this class the URL must be provided. The `url` will be set as the origin of the data. Note that when fetching data with an authenticated access (i.e., user and password), information about issue transitions and operations (e.g., edit-issue, comment-issue) is included in the JSON documents produced by the backend. :param url: JIRA's endpoint :param project: filter issues by project :param user: Jira user :param password: Jira user password :param cert: SSL certificate path (PEM) :param max_results: max number of results per query :param tag: label used to mark the data :param archive: archive to store/retrieve items :param ssl_verify: enable/disable SSL verification """ version = '0.14.0' CATEGORIES = [CATEGORY_ISSUE] EXTRA_SEARCH_FIELDS = { 'project_id': ['fields', 'project', 'id'], 'project_key': ['fields', 'project', 'key'], 'project_name': ['fields', 'project', 'name'], 'issue_key': ['key'] } def __init__(self, url, project=None, user=None, password=None, cert=None, max_results=MAX_RESULTS, tag=None, archive=None, ssl_verify=True): origin = url super().__init__(origin, tag=tag, archive=archive, ssl_verify=ssl_verify) self.url = url self.project = project self.user = user self.password = password self.cert = cert self.max_results = max_results self.client = None
[docs] def fetch(self, category=CATEGORY_ISSUE, from_date=DEFAULT_DATETIME): """Fetch the issues from the site. The method retrieves, from a JIRA site, the issues updated since the given date. :param category: the category of items to fetch :param from_date: retrieve issues updated from this date :returns: a generator of issues """ if not from_date: from_date = DEFAULT_DATETIME from_date = datetime_to_utc(from_date) kwargs = {'from_date': from_date} items = super().fetch(category, **kwargs) return items
[docs] def fetch_items(self, category, **kwargs): """Fetch the issues :param category: the category of items to fetch :param kwargs: backend arguments :returns: a generator of items """ from_date = kwargs['from_date'] logger.info("Looking for issues at site '%s', in project '%s' and updated from '%s'", self.url, self.project, str(from_date)) whole_pages = self.client.get_issues(from_date) fields = json.loads(self.client.get_fields()) custom_fields = filter_custom_fields(fields) for whole_page in whole_pages: issues = self.parse_issues(whole_page) for issue in issues: mapping = map_custom_field(custom_fields, issue['fields']) for k, v in mapping.items(): issue['fields'][k] = v comments_data = self.__get_issue_comments(issue['id']) issue['comments_data'] = comments_data yield issue
[docs] @classmethod def has_archiving(cls): """Returns whether it supports archiving items on the fetch process. :returns: this backend supports items archive """ return True
[docs] @classmethod def has_resuming(cls): """Returns whether it supports to resume the fetch process. :returns: this backend supports items resuming """ return True
[docs] @staticmethod def metadata_id(item): """Extracts the identifier from a Jira item.""" return str(item['id'])
[docs] @staticmethod def metadata_updated_on(item): """Extracts the update time from a Jira item. The timestamp used is extracted from 'updated' field. This date is converted to UNIX timestamp format taking into account the timezone of the date. :param item: item generated by the backend :returns: a UNIX timestamp """ ts = item['fields']['updated'] ts = str_to_datetime(ts) return ts.timestamp()
[docs] @staticmethod def metadata_category(item): """Extracts the category from a Jira item. This backend only generates one type of item which is 'issue'. """ return CATEGORY_ISSUE
[docs] @staticmethod def parse_issues(raw_page): """Parse a JIRA API raw response. The method parses the API response retrieving the issues from the received items :param items: items from where to parse the issues :returns: a generator of issues """ raw_issues = json.loads(raw_page) issues = raw_issues['issues'] for issue in issues: yield issue
def _init_client(self, from_archive=False): """Init client""" return JiraClient(self.url, self.project, self.user, self.password, self.cert, self.max_results, self.archive, from_archive, self.ssl_verify) def __get_issue_comments(self, issue_id): """Get issue comments""" comments = [] page_comments = self.client.get_comments(issue_id) for page_comment in page_comments: raw_comments = json.loads(page_comment) comments.extend(raw_comments['comments']) return comments
[docs]class JiraClient(HttpClient): """JIRA API client. This class implements a simple client to retrieve issues from any JIRA issue tracking system. :param URL: URL of the JIRA server :param project: filter issues by project :param user: JIRA's username :param password: JIRA's password :param cert: SSL certificate :param max_results: max number of results per query :param archive: an archive to store/read fetched data :param from_archive: it tells whether to write/read the archive :param ssl_verify: enable/disable SSL verification :raises HTTPError: when an error occurs doing the request """ VERSION_API = '2' RESOURCE = 'rest/api' # API resources RISSUE = 'issue' RCOMMENT = 'comment' RFIELD = 'field' RSEARCH = 'search' # Resource parameters PJQL = 'jql' PSTART_AT = 'startAt' PEXPAND = 'expand' PMAX_RESULTS = 'maxResults' # Predefined values VEXPAND = 'renderedFields,transitions,operations,changelog' def __init__(self, url, project, user, password, cert, max_results=MAX_RESULTS, archive=None, from_archive=False, ssl_verify=True): super().__init__(url, archive=archive, from_archive=from_archive, ssl_verify=ssl_verify) self.project = project self.user = user self.password = password self.cert = cert self.max_results = max_results if not from_archive: self.__init_session()
[docs] def get_items(self, from_date, url, expand_fields=True): """Retrieve all the items from a given date. :param url: endpoint API url :param from_date: obtain items updated since this date :param expand_fields: if True, it includes the expand fields in the payload """ start_at = 0 req = self.fetch(url, payload=self.__build_payload(start_at, from_date, expand_fields)) issues = req.text data = req.json() titems = data['total'] nitems = data['maxResults'] start_at += min(nitems, titems) self.__log_status(start_at, titems, url) while issues: yield issues issues = None if data['startAt'] + nitems < titems: req = self.fetch(url, payload=self.__build_payload(start_at, from_date, expand_fields)) data = req.json() start_at += nitems issues = req.text self.__log_status(start_at, titems, url)
[docs] def get_issues(self, from_date): """Retrieve all the issues from a given date. :param from_date: obtain issues updated since this date """ url = urijoin(self.base_url, self.RESOURCE, self.VERSION_API, self.RSEARCH) issues = self.get_items(from_date, url) return issues
[docs] def get_comments(self, issue_id): """Retrieve all the comments of a given issue. :param issue_id: ID of the issue """ url = urijoin(self.base_url, self.RESOURCE, self.VERSION_API, self.RISSUE, issue_id, self.RCOMMENT) comments = self.get_items(DEFAULT_DATETIME, url, expand_fields=False) return comments
[docs] def get_fields(self): """Retrieve all the fields available.""" url = urijoin(self.base_url, self.RESOURCE, self.VERSION_API, self.RFIELD) req = self.fetch(url) return req.text
def __build_jql_query(self, from_date): AND_OP = 'AND' UPDATED_OP = 'updated >' PROJECT_OP = 'project =' ORDER_BY_OP = 'order by' ASC_OP = 'asc' # Convert datetime to milliseconds since 1970-01-01. # This allows us to use the timezone of the given date strdate = str(int(from_date.timestamp() * 1000)) if self.project: jql_query = ' '.join([PROJECT_OP, self.project, AND_OP, UPDATED_OP, strdate]) else: jql_query = ' '.join([UPDATED_OP, strdate]) jql_query += ' '.join(['', ORDER_BY_OP, 'updated', ASC_OP]) return jql_query def __build_payload(self, start_at, from_date, expand=True): payload = { self.PJQL: self.__build_jql_query(from_date), self.PSTART_AT: start_at, self.PEXPAND: self.VEXPAND, self.PMAX_RESULTS: self.max_results } if not expand: payload.pop(self.PEXPAND) return payload def __log_status(self, max_items, total, url): if total != 0: nitems = min(max_items, total) logger.info("Fetching %s/%s items from %s" % (nitems, total, url)) else: logger.info("No items were found for %s." % url) def __init_session(self): if (self.user and self.password) is not None: self.session.auth = (self.user, self.password) if self.cert: self.session.cert = self.cert if self.ssl_verify is not True: requests.packages.urllib3.disable_warnings(InsecureRequestWarning) self.session.verify = False
[docs]class JiraCommand(BackendCommand): """Class to run Jira backend from the command line.""" BACKEND = Jira
[docs] @classmethod def setup_cmd_parser(cls): """Returns the Jira argument parser.""" parser = BackendCommandArgumentParser(cls.BACKEND, from_date=True, basic_auth=True, archive=True, ssl_verify=True) # JIRA options group = parser.parser.add_argument_group('JIRA arguments') group.add_argument('--project', help="filter issues by Project") group.add_argument('--cert', help="SSL certificate path (PEM)") group.add_argument('--max-results', dest='max_results', type=int, default=MAX_RESULTS, help="Maximum number of results requested in the same query") # Required arguments parser.parser.add_argument('url', help="JIRA's url") return parser