Source code for perceval.backends.core.launchpad

# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2020 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
#     Valerio Cosentino <valcos@bitergia.com>
#     Santiago DueƱas <sduenas@bitergia.com>
#     Jesus M. Gonzalez-Barahona <jgb@gsyc.es>
#     Harshal Mittal <harshalmittal4@gmail.com>
#

import json
import logging
import requests

from grimoirelab_toolkit.datetime import (datetime_to_utc,
                                          str_to_datetime)
from grimoirelab_toolkit.uris import urijoin

from ...backend import (Backend,
                        BackendCommand,
                        BackendCommandArgumentParser,
                        DEFAULT_SEARCH_FIELD)
from ...client import HttpClient
from ...utils import DEFAULT_DATETIME

CATEGORY_ISSUE = "issue"

LAUNCHPAD_URL = "https://launchpad.net/"
LAUNCHPAD_API_URL = 'https://api.launchpad.net/1.0'

TARGET_ISSUE_FIELDS = ['bug_link', 'owner_link', 'assignee_link']
ITEMS_PER_PAGE = 75
SLEEP_TIME = 300

logger = logging.getLogger(__name__)


[docs]class Launchpad(Backend): """Launchpad backend for Perceval. This class allows the fetch the issues stored in Launchpad. :param distribution: Launchpad distribution :param package: Distribution package :param items_per_page: number of items in a retrieved page :param sleep_time: time (in seconds) to sleep in case of connection problems :param tag: label used to mark the data :param archive: archive to store/retrieve items :param ssl_verify: enable/disable SSL verification """ version = '0.8.1' CATEGORIES = [CATEGORY_ISSUE] def __init__(self, distribution, package=None, items_per_page=ITEMS_PER_PAGE, sleep_time=SLEEP_TIME, tag=None, archive=None, ssl_verify=True): origin = urijoin(LAUNCHPAD_URL, distribution) super().__init__(origin, tag=tag, archive=archive, ssl_verify=ssl_verify) self.distribution = distribution self.package = package self.items_per_page = items_per_page self.sleep_time = sleep_time self.client = None self._users = {} # internal users cache
[docs] def search_fields(self, item): """Add search fields to an item. It adds the values of `metadata_id` plus additional values depending on the item category. For the categories `issue` and `pull_request`, the search fields include the issue/pull request number, labels, state and the name of the milestone. For the category `repository`, license and language are set as search fields. :param item: the item to extract the search fields values :returns: a dict of search fields """ search_fields = { DEFAULT_SEARCH_FIELD: self.metadata_id(item), 'distribution': self.distribution } return search_fields
[docs] def fetch(self, category=CATEGORY_ISSUE, from_date=DEFAULT_DATETIME): """Fetch the issues from a project (distribution/package). The method retrieves, from a Launchpad project, the issues updated since the given date. :param category: the category of items to fetch :param from_date: obtain issues updated since this date :returns: a generator of issues """ if not from_date: from_date = DEFAULT_DATETIME from_date = datetime_to_utc(from_date) kwargs = {'from_date': from_date} items = super().fetch(category, **kwargs) return items
[docs] def fetch_items(self, category, **kwargs): """Fetch the issues :param category: the category of items to fetch :param kwargs: backend arguments :returns: a generator of items """ from_date = kwargs['from_date'] logger.info("Fetching issues of '%s' distribution from %s", self.distribution, str(from_date)) nissues = 0 for issue in self._fetch_issues(from_date): yield issue nissues += 1 logger.info("Fetch process completed: %s issues fetched", nissues)
[docs] @classmethod def has_archiving(cls): """Returns whether it supports archiving items on the fetch process. :returns: this backend supports items archive """ return True
[docs] @classmethod def has_resuming(cls): """Returns whether it supports to resume the fetch process. :returns: this backend supports items resuming """ return True
[docs] @staticmethod def metadata_id(item): """Extracts the identifier from a Launchpad item.""" return str(item['bug_data']['id'])
[docs] @staticmethod def metadata_updated_on(item): """Extracts the update time from a Launchpad item. The timestamp used is extracted from 'date_last_updated' field. This date is converted to UNIX timestamp format. As Launchpad dates are in UTC in ISO 8601 (e.g., '2008-03-26T01:43:15.603905+00:00') the conversion is straightforward. :param item: item generated by the backend :returns: a UNIX timestamp """ ts = item['bug_data']['date_last_updated'] ts = str_to_datetime(ts) return ts.timestamp()
[docs] @staticmethod def metadata_category(item): """Extracts the category from a Launchpad item. This backend only generates one type of item which is 'issue'. """ return CATEGORY_ISSUE
def _init_client(self, from_archive=False): """Init client""" return LaunchpadClient(self.distribution, self.package, self.items_per_page, self.sleep_time, self.archive, from_archive, self.ssl_verify) def __init_extra_issue_fields(self, issue): """Add fields to an issue""" issue['bug_data'] = {} issue['owner_data'] = {} issue['assignee_data'] = {} return issue def __extract_issue_id(self, bug_link): """Extract issue id from bug link""" return bug_link.split('/')[-1] def _fetch_issues(self, from_date): """Fetch the issues from a project (distribution/package)""" issues_groups = self.client.issues(start=from_date) for raw_issues in issues_groups: issues = json.loads(raw_issues)['entries'] for issue in issues: issue = self.__init_extra_issue_fields(issue) issue_id = self.__extract_issue_id(issue['bug_link']) for field in TARGET_ISSUE_FIELDS: if not issue[field]: continue if field == 'bug_link': issue['bug_data'] = self.__fetch_issue_data(issue_id) issue['activity_data'] = [activity for activity in self.__fetch_issue_activities(issue_id)] issue['messages_data'] = [message for message in self.__fetch_issue_messages(issue_id)] issue['attachments_data'] = [attachment for attachment in self.__fetch_issue_attachments(issue_id)] elif field == 'assignee_link': issue['assignee_data'] = self.__fetch_user_data('{ASSIGNEE}', issue[field]) elif field == 'owner_link': issue['owner_data'] = self.__fetch_user_data('{OWNER}', issue[field]) yield issue def __fetch_issue_data(self, issue_id): """Get data associated to an issue""" raw_issue = self.client.issue(issue_id) issue = json.loads(raw_issue) return issue def __fetch_issue_attachments(self, issue_id): """Get attachments of an issue""" for attachments_raw in self.client.issue_collection(issue_id, "attachments"): attachments = json.loads(attachments_raw) for attachment in attachments['entries']: yield attachment def __fetch_issue_messages(self, issue_id): """Get messages of an issue""" for messages_raw in self.client.issue_collection(issue_id, "messages"): messages = json.loads(messages_raw) for msg in messages['entries']: msg['owner_data'] = self.__fetch_user_data('{OWNER}', msg['owner_link']) yield msg def __fetch_issue_activities(self, issue_id): """Get activities on an issue""" for activities_raw in self.client.issue_collection(issue_id, "activity"): activities = json.loads(activities_raw) for act in activities['entries']: act['person_data'] = self.__fetch_user_data('{PERSON}', act['person_link']) yield act def __fetch_user_data(self, tag_type, user_link): """Get data associated to an user""" user_name = self.client.user_name(user_link) user = {} if not user_name: return user user_raw = self.client.user(user_name) user = json.loads(user_raw) return user
[docs]class LaunchpadClient(HttpClient): """Client for retrieving information from Launchpad API :param distribution: Launchpad distribution :param package: Distribution package :param items_per_page: number of items in a retrieved page :param sleep_time: time (in seconds) to sleep in case of connection problems :param archive: an archive to store/read fetched data :param from_archive: it tells whether to write/read the archive :param ssl_verify: enable/disable SSL verification """ _users = {} # API resources RBUGS = 'bugs' RSOURCE = "+source" # API headers HCONTENT_TYPE = 'Content-type' # Resource parameters PWS_SIZE = 'ws.size' PWS_START = 'ws.start' PORDER_BY = 'order_by' POMIT_DULPLICATES = 'omit_duplicates' PSTATUS = 'status' PWS_OP = 'ws.op' PMODIFIED_SINCE = 'modified_since' # Predefined values VDATE_LAST_MODIFIED = 'date_last_updated' VCONTENT_TYPE = 'application/json' VOMIT_DUPLICATES = 'false' VSEARCH_TASKS = 'searchTasks' VSTATUS = ["New", "Incomplete", "Opinion", "Invalid", "Won't Fix", "Expired", "Confirmed", "Triaged", "In Progress", "Fix Committed", "Fix Released", "Incomplete (with response)", "Incomplete (without response)"] def __init__(self, distribution, package=None, items_per_page=ITEMS_PER_PAGE, sleep_time=SLEEP_TIME, archive=None, from_archive=False, ssl_verify=True): self.distribution = distribution self.package = package self.items_per_page = items_per_page extra_headers = self.__define_headers() super().__init__(LAUNCHPAD_API_URL, sleep_time=sleep_time, extra_headers=extra_headers, archive=archive, from_archive=from_archive, ssl_verify=ssl_verify)
[docs] def issues(self, start=None): """Get the issues from pagination""" payload = self.__build_payload(size=self.items_per_page, operation=True, startdate=start) path = self.__get_url_project() return self.__fetch_items(path=path, payload=payload)
[docs] def user(self, user_name): """Get the user data by URL""" user = None if user_name in self._users: return self._users[user_name] url_user = self.__get_url("~" + user_name) logger.info("Getting info for %s" % (url_user)) try: raw_user = self.__send_request(url_user) user = raw_user except requests.exceptions.HTTPError as e: if e.response.status_code in [404, 410]: logger.warning("Data is not available - %s", url_user) user = '{}' else: raise e self._users[user_name] = user return user
[docs] def user_name(self, user_link): """Get user name from link""" return user_link.split('/')[-1][1:]
[docs] def issue(self, issue_id): """Get the issue data by its ID""" path = urijoin(self.RBUGS, str(issue_id)) url_issue = self.__get_url(path) raw_text = self.__send_request(url_issue) return raw_text
[docs] def issue_collection(self, issue_id, collection_name): """Get a collection list of a given issue""" path = urijoin(self.RBUGS, str(issue_id), collection_name) url_collection = self.__get_url(path) payload = {self.PWS_SIZE: self.items_per_page, self.PWS_START: 0, self.PORDER_BY: self.VDATE_LAST_MODIFIED} raw_items = self.__fetch_items(path=url_collection, payload=payload) return raw_items
def __get_url_project(self): """Build URL project""" if self.package: url = self.__get_url_distribution_package() else: url = self.__get_url_distribution() return url def __get_url_distribution(self): """Build URL distribution""" return urijoin(self.base_url, self.distribution) def __get_url_distribution_package(self): """Build URL distribution package""" return urijoin(self.__get_url_distribution(), self.RSOURCE, self.package) def __get_url(self, path): """Build genereic URL""" return urijoin(self.base_url, path) def __define_headers(self): """Add headers to the Client default ones""" headers = {self.HCONTENT_TYPE: self.VCONTENT_TYPE} return headers def __send_request(self, url, params=None): """Send request""" r = self.fetch(url, payload=params) return r.text def __build_payload(self, size, operation=False, startdate=None): """Build payload""" payload = { self.PWS_SIZE: size, self.PORDER_BY: self.VDATE_LAST_MODIFIED, self.POMIT_DULPLICATES: self.VOMIT_DUPLICATES, self.PSTATUS: self.VSTATUS } if operation: payload[self.PWS_OP] = self.VSEARCH_TASKS if startdate: startdate = startdate.isoformat() payload[self.PMODIFIED_SINCE] = startdate return payload def __fetch_items(self, path, payload): """Return the items from Launchpad API using pagination""" page = 0 # current page url_next = path fetch_data = True while fetch_data: logger.debug("Fetching page: %i", page) try: raw_content = self.__send_request(url_next, payload) content = json.loads(raw_content) except requests.exceptions.HTTPError as e: if e.response.status_code in [410]: logger.warning("Data is not available - %s", url_next) raw_content = '{"total_size": 0, "start": 0, "entries": []}' content = json.loads(raw_content) else: raise e if 'next_collection_link' in content: url_next = content['next_collection_link'] payload = None else: fetch_data = False yield raw_content page += 1
[docs]class LaunchpadCommand(BackendCommand): """Class to run Launchpad backend from the command line.""" BACKEND = Launchpad
[docs] @classmethod def setup_cmd_parser(cls): """Returns the Launchpad argument parser.""" parser = BackendCommandArgumentParser(cls.BACKEND, from_date=True, archive=True, token_auth=False, ssl_verify=True) # Optional arguments group = parser.parser.add_argument_group('Launchpad arguments') group.add_argument('--items-per-page', dest='items_per_page', help="Items per page") group.add_argument('--sleep-time', dest='sleep_time', help="Sleep time in case of connection lost") group.add_argument('--package', dest='package', help="Distribution package") # Required arguments parser.parser.add_argument('distribution', help="Launchpad distribution") return parser