# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2020 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# Animesh Kumar <animuz111@gmail.com>
#
import logging
from grimoirelab_toolkit.datetime import str_to_datetime, datetime_utcnow
from grimoirelab_toolkit.uris import urijoin
from ...backend import (Backend,
BackendCommand,
BackendCommandArgumentParser,
BackendError)
from ...client import HttpClient, RateLimitHandler
from ...utils import DEFAULT_DATETIME
CATEGORY_MESSAGE = "message"
GITTER_URL = 'https://gitter.im/'
GITTER_API_URL = 'https://api.gitter.im/v1'
DEFAULT_SEARCH_FIELD = 'item_id'
MAX_ITEMS = 100
MIN_RATE_LIMIT = 10
logger = logging.getLogger(__name__)
[docs]class Gitter(Backend):
"""Gitter backend.
This class retrieves the messages sent to a Gitter room.
To access the server an API token is required.
The origin of the data will be set to the `GITTER_URL` plus the
identifier of the room; i.e 'https://gitter.im/{group}/{room}'.
:param group: group to which the room belongs
:param room: identifier of the room from which the messages are to be fetched
:param api_token: token or key needed to use the API
:param max_items: maximum number of message requested on the same query
:param sleep_for_rate: sleep until rate limit is reset
:param min_rate_to_sleep: minimum rate needed to sleep until
it will be reset
:param tag: label used to mark the data
:param archive: archive to store/retrieve items
:param ssl_verify: enable/disable SSL verification
"""
version = '0.1.0'
CATEGORIES = [CATEGORY_MESSAGE]
def __init__(self, group=None, room=None, api_token=None, max_items=MAX_ITEMS,
sleep_for_rate=False, min_rate_to_sleep=MIN_RATE_LIMIT,
tag=None, archive=None, ssl_verify=True):
origin = urijoin(GITTER_URL, group, room)
super().__init__(origin, tag=tag, archive=archive, ssl_verify=ssl_verify)
self.group = group
self.room = room
self.api_token = api_token
self.max_items = max_items
self.sleep_for_rate = sleep_for_rate
self.min_rate_to_sleep = min_rate_to_sleep
self.client = None
self.room_id = None
[docs] def search_fields(self, item):
"""Add search fields to an item.
It adds the values of `metadata_id`,`group`,`room`
and 'room_id'.
:param item: the item to extract the search fields values
:returns: a dict of search fields
"""
search_fields = {
DEFAULT_SEARCH_FIELD: self.metadata_id(item),
'group': self.group,
'room': self.room,
'room_id': self.room_id
}
return search_fields
[docs] def fetch(self, category=CATEGORY_MESSAGE, from_date=DEFAULT_DATETIME):
"""Fetch the messages from the room.
This method fetches the messages sent in the room that were
sent since the given date.
:param category: the category of items to fetch
:param from_date: date from which messages are to be fetched
:returns: a generator of messages
"""
if not from_date:
from_date = DEFAULT_DATETIME
from_date = from_date.strftime('%Y-%m-%d')
kwargs = {
'from_date': from_date,
}
items = super().fetch(category, **kwargs)
return items
[docs] def fetch_items(self, category, **kwargs):
"""Fetch the messages.
:param category: the category of items to fetch
:param kwargs: backend arguments
:returns: a generator of items
"""
from_date = kwargs['from_date']
logger.debug("Get Gitter message paginated items of room: %s from date: %s", self.room, from_date)
from_date = str_to_datetime(from_date).timestamp()
fetching = True
before_id = None
num_msgs = 0
page = 0
room = urijoin(self.group, self.room)
self.room_id = self.client.get_room_id(room)
if not self.room_id:
msg = "Room id not found for room %s" % room
logger.error(msg)
raise BackendError(cause=msg)
logger.debug("Room id of room: %s is %s", self.room, self.room_id)
while fetching:
page += 1
logger.debug("Page: %i" % page)
message_group = self.client.message_page(self.room_id, before_id)
message_group = message_group.json()
if not message_group:
fetching = False
continue
for raw_message in message_group:
if str_to_datetime(raw_message['sent']).timestamp() > from_date:
num_msgs += 1
yield raw_message
else:
fetching = False
before_id = message_group[0]['id']
logger.debug("Fetch process completed: %s messages fetched", num_msgs)
[docs] @classmethod
def has_archiving(cls):
"""Returns whether it supports archiving items on the fetch process.
:returns: this backend supports items archive
"""
return True
[docs] @classmethod
def has_resuming(cls):
"""Returns whether it supports to resume the fetch process.
:returns: this backend does not support items resuming
"""
return False
def _init_client(self, from_archive=False):
"""Init client"""
return GitterClient(self.api_token, self.max_items, self.archive,
self.sleep_for_rate, self.min_rate_to_sleep,
from_archive, self.ssl_verify)
[docs]class GitterClient(HttpClient, RateLimitHandler):
"""Gitter API client.
Client for fetching information from the Gitter server
using its REST API.
:param api_token: key needed to use the API
:param max_items: maximum number of items per request
:param archive: an archive to store/read fetched data
:param sleep_for_rate: sleep until rate limit is reset
:param min_rate_to_sleep: minimum rate needed to sleep until
it will be reset
:param from_archive: it tells whether to write/read the archive
:param ssl_verify: enable/disable SSL verification
"""
# API resources
RMESSAGES = 'chatMessages'
RROOMS = 'rooms'
# API headers
HAUTHORIZATION = 'Authorization'
# Resource parameters
PLIMIT = 'limit'
PBEFORE_ID = 'beforeId'
def __init__(self, api_token, max_items=MAX_ITEMS, archive=None,
sleep_for_rate=False, min_rate_to_sleep=MIN_RATE_LIMIT,
from_archive=False, ssl_verify=True):
base_url = GITTER_API_URL
self.api_token = api_token
self.max_items = max_items
super().__init__(base_url, archive=archive, from_archive=from_archive,
ssl_verify=ssl_verify)
super().setup_rate_limit_handler(sleep_for_rate=sleep_for_rate, min_rate_to_sleep=min_rate_to_sleep)
[docs] def calculate_time_to_reset(self):
"""Number of seconds to wait. They are contained in the rate limit reset header"""
current_epoch = (datetime_utcnow().replace(microsecond=0).timestamp() + 1) * 1000
time_to_reset = (self.rate_limit_reset_ts - current_epoch) / 1000
if time_to_reset < 0:
time_to_reset = 0
return time_to_reset
[docs] def fetch(self, url, payload=None, headers=None):
"""Fetch the data from a given URL.
:param url: link to the resource
:param payload: payload of the request
:param headers: headers of the request
:returns a response object
"""
headers = {
self.HAUTHORIZATION: 'Bearer {}'.format(self.api_token)
}
logger.debug("Gitter client message request with params: %s", str(payload))
if not self.from_archive:
self.sleep_for_rate_limit()
response = super().fetch(url, payload, headers=headers)
if not self.from_archive:
self.update_rate_limit(response)
return response
[docs] def message_page(self, room_id, before_id):
"""Fetch a page of messages."""
payload = {
self.PLIMIT: self.max_items,
}
if before_id:
payload[self.PBEFORE_ID] = before_id
path = urijoin(GITTER_API_URL, self.RROOMS,
room_id, self.RMESSAGES)
return self.fetch(path, payload)
[docs] def get_room_id(self, room):
"""Fetch the room id of a room."""
path = urijoin(GITTER_API_URL, self.RROOMS)
rooms = self.fetch(path)
rooms = rooms.json()
for raw_room in rooms:
if raw_room['name'] == room:
return raw_room['id']
return None
[docs] @staticmethod
def sanitize_for_archive(url, headers, payload):
"""Sanitize payload of a HTTP request by removing the token information
before storing/retrieving archived items.
:param: url: HTTP url request
:param: headers: HTTP headers request
:param: payload: HTTP payload request
:returns url, headers and the sanitized payload
"""
if GitterClient.HAUTHORIZATION in headers:
headers.pop(GitterClient.HAUTHORIZATION)
return url, headers, payload
[docs]class GitterCommand(BackendCommand):
"""Class to run Gitter backend from the command line."""
BACKEND = Gitter
[docs] @classmethod
def setup_cmd_parser(cls):
"""Returns the Gitter argument parser."""
parser = BackendCommandArgumentParser(cls.BACKEND,
from_date=True,
token_auth=True,
archive=True,
ssl_verify=True)
# Backend token is required
action = parser.parser._option_string_actions['--api-token']
action.required = True
# Gitter options
group = parser.parser.add_argument_group('Gitter arguments')
group.add_argument('--max-items', dest='max_items',
type=int, default=MAX_ITEMS,
help="Maximum number of items requested on the same query")
group.add_argument('--sleep-for-rate', dest='sleep_for_rate',
action='store_true',
help="sleep for getting more rate")
group.add_argument('--min-rate-to-sleep', dest='min_rate_to_sleep',
default=MIN_RATE_LIMIT, type=int,
help="sleep until reset when the rate limit reaches this value")
# Required arguments
parser.parser.add_argument('group',
help="Gitter group")
parser.parser.add_argument('room',
help="Gitter room")
return parser