# -*- coding: utf-8 -*-
#
# Copyright (C) 2015-2020 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# Valerio Cosentino <valcos@bitergia.com>
# Santiago DueƱas <sduenas@bitergia.com>
# Jesus M. Gonzalez-Barahona <jgb@gsyc.es>
# Harshal Mittal <harshalmittal4@gmail.com>
#
import json
import logging
import requests
from grimoirelab_toolkit.datetime import datetime_to_utc
from grimoirelab_toolkit.uris import urijoin
from ...backend import (Backend,
BackendCommand,
BackendCommandArgumentParser)
from ...client import HttpClient, RateLimitHandler
from ...errors import RepositoryError
from ...utils import DEFAULT_DATETIME
CATEGORY_EVENT = "event"
MEETUP_URL = 'https://meetup.com/'
MEETUP_API_URL = 'https://api.meetup.com/'
MAX_ITEMS = 200
# Range before sleeping until rate limit reset
MIN_RATE_LIMIT = 1
# Time to avoid too many request exception
SLEEP_TIME = 30
logger = logging.getLogger(__name__)
[docs]class Meetup(Backend):
"""Meetup backend.
This class allows to fetch the events of a group from the
Meetup server. Initialize this class passing the OAuth2 token needed
for authentication with the parameter `api_token`.
:param group: name of the group where data will be fetched
:param api_token: OAuth2 token to access the API
:param max_items: maximum number of issues requested on the same query
:param tag: label used to mark the data
:param archive: archive to store/retrieve items
:param sleep_for_rate: sleep until rate limit is reset
:param min_rate_to_sleep: minimun rate needed to sleep until
it will be reset
:param sleep_time: time (in seconds) to sleep in case
of connection problems
:param ssl_verify: enable/disable SSL verification
"""
version = '0.17.0'
CATEGORIES = [CATEGORY_EVENT]
CLASSIFIED_FIELDS = [
['group', 'topics'],
['event_hosts'],
['rsvps'],
['venue']
]
EXTRA_SEARCH_FIELDS = {
'group_name': ['group', 'name'],
'group_id': ['group', 'id']
}
def __init__(self, group, api_token,
max_items=MAX_ITEMS, tag=None, archive=None,
sleep_for_rate=False, min_rate_to_sleep=MIN_RATE_LIMIT,
sleep_time=SLEEP_TIME, ssl_verify=True):
origin = MEETUP_URL
super().__init__(origin, tag=tag, archive=archive, ssl_verify=ssl_verify)
self.group = group
self.max_items = max_items
self.api_token = api_token
self.sleep_for_rate = sleep_for_rate
self.min_rate_to_sleep = min_rate_to_sleep
self.sleep_time = sleep_time
self.client = None
[docs] def fetch(self, category=CATEGORY_EVENT, from_date=DEFAULT_DATETIME, to_date=None,
filter_classified=False):
"""Fetch the events from the server.
This method fetches those events of a group stored on the server
that were updated since the given date. Data comments and rsvps
are included within each event.
:param category: the category of items to fetch
:param from_date: obtain events updated since this date
:param to_date: obtain events updated before this date
:param filter_classified: remove classified fields from the resulting items
:returns: a generator of events
"""
if not from_date:
from_date = DEFAULT_DATETIME
from_date = datetime_to_utc(from_date)
kwargs = {"from_date": from_date, "to_date": to_date}
items = super().fetch(category,
filter_classified=filter_classified,
**kwargs)
return items
[docs] def fetch_items(self, category, **kwargs):
"""Fetch the events
:param category: the category of items to fetch
:param kwargs: backend arguments
:returns: a generator of items
"""
from_date = kwargs['from_date']
to_date = kwargs['to_date']
logger.info("Fetching events of '%s' group from %s to %s",
self.group, str(from_date),
str(to_date) if to_date else '--')
to_date_ts = datetime_to_utc(to_date).timestamp() if to_date else None
nevents = 0
stop_fetching = False
ev_pages = self.client.events(self.group, from_date=from_date)
for evp in ev_pages:
events = [event for event in self.parse_json(evp)]
for event in events:
event_id = event['id']
event['comments'] = self.__fetch_and_parse_comments(event_id)
event['rsvps'] = self.__fetch_and_parse_rsvps(event_id)
# Check events updated before 'to_date'
event_ts = self.metadata_updated_on(event)
if to_date_ts and event_ts >= to_date_ts:
stop_fetching = True
continue
yield event
nevents += 1
if stop_fetching:
break
logger.info("Fetch process completed: %s events fetched", nevents)
[docs] @classmethod
def has_archiving(cls):
"""Returns whether it supports archiving items on the fetch process.
:returns: this backend supports items archive
"""
return True
[docs] @classmethod
def has_resuming(cls):
"""Returns whether it supports to resume the fetch process.
:returns: this backend supports items resuming
"""
return True
[docs] @staticmethod
def parse_json(raw_json):
"""Parse a Meetup JSON stream.
The method parses a JSON stream and returns a list
with the parsed data.
:param raw_json: JSON string to parse
:returns: a list with the parsed data
"""
result = json.loads(raw_json)
return result
def _init_client(self, from_archive=False):
"""Init client"""
return MeetupClient(self.api_token, self.max_items,
self.sleep_for_rate, self.min_rate_to_sleep, self.sleep_time,
self.archive, from_archive, self.ssl_verify)
def __fetch_and_parse_comments(self, event_id):
logger.debug("Fetching and parsing comments from group '%s' event '%s'",
self.group, str(event_id))
comments = []
raw_pages = self.client.comments(self.group, event_id)
for raw_page in raw_pages:
for comment in self.parse_json(raw_page):
comments.append(comment)
return comments
def __fetch_and_parse_rsvps(self, event_id):
logger.debug("Fetching and parsing rsvps from group '%s' event '%s'",
self.group, str(event_id))
rsvps = []
raw_pages = self.client.rsvps(self.group, event_id)
for raw_page in raw_pages:
for rsvp in self.parse_json(raw_page):
rsvps.append(rsvp)
return rsvps
[docs]class MeetupCommand(BackendCommand):
"""Class to run Meetup backend from the command line."""
BACKEND = Meetup
[docs] @classmethod
def setup_cmd_parser(cls):
"""Returns the Meetup argument parser."""
parser = BackendCommandArgumentParser(cls.BACKEND,
from_date=True,
to_date=True,
token_auth=True,
archive=True,
ssl_verify=True)
# Meetup options
group = parser.parser.add_argument_group('Meetup arguments')
group.add_argument('--max-items', dest='max_items',
type=int, default=MAX_ITEMS,
help="Maximum number of items requested on the same query")
group.add_argument('--sleep-for-rate', dest='sleep_for_rate',
action='store_true',
help="sleep for getting more rate")
group.add_argument('--min-rate-to-sleep', dest='min_rate_to_sleep',
default=MIN_RATE_LIMIT, type=int,
help="sleep until reset when the rate limit reaches this value")
group.add_argument('--sleep-time', dest='sleep_time',
default=SLEEP_TIME, type=int,
help="minimun sleeping time to avoid too many request exception")
# Required arguments
parser.parser.add_argument('group',
help="Meetup group name")
return parser
[docs]class MeetupClient(HttpClient, RateLimitHandler):
"""Meetup API client.
Client for fetching information from the Meetup server
using its REST API v3.
:param api_token: OAuth2 token needed to access the API
:param max_items: maximum number of items per request
:param sleep_for_rate: sleep until rate limit is reset
:param min_rate_to_sleep: minimun rate needed to sleep until
it will be reset
:param sleep_time: time (in seconds) to sleep in case
of connection problems
:param archive: an archive to store/read fetched data
:param from_archive: it tells whether to write/read the archive
:param ssl_verify: enable/disable SSL verification
"""
EXTRA_STATUS_FORCELIST = [429]
RCOMMENTS = 'comments'
REVENTS = 'events'
RRSVPS = 'rsvps'
PFIELDS = 'fields'
PKEY_OAUTH2 = 'Authorization'
PORDER = 'order'
PPAGE = 'page'
PRESPONSE = 'response'
PSCROLL = 'scroll'
PSTATUS = 'status'
VEVENT_FIELDS = ['event_hosts', 'featured', 'group_topics',
'plain_text_description', 'rsvpable', 'series']
VRSVP_FIELDS = ['attendance_status']
VRESPONSE = ['yes', 'no']
# FIXME: Add 'draft' status when the bug in the Meetup API gets fixed.
# More info in https://github.com/meetup/api/issues/260
VSTATUS = ['cancelled', 'upcoming', 'past', 'proposed', 'suggested']
VUPDATED = 'updated'
def __init__(self, api_token, max_items=MAX_ITEMS,
sleep_for_rate=False, min_rate_to_sleep=MIN_RATE_LIMIT, sleep_time=SLEEP_TIME,
archive=None, from_archive=False, ssl_verify=True):
self.api_token = api_token
self.max_items = max_items
super().__init__(MEETUP_API_URL, sleep_time=sleep_time,
extra_status_forcelist=self.EXTRA_STATUS_FORCELIST,
archive=archive, from_archive=from_archive, ssl_verify=ssl_verify)
super().setup_rate_limit_handler(sleep_for_rate=sleep_for_rate, min_rate_to_sleep=min_rate_to_sleep)
[docs] def calculate_time_to_reset(self):
"""Number of seconds to wait. They are contained in the rate limit reset header"""
time_to_reset = 0 if self.rate_limit_reset_ts < 0 else self.rate_limit_reset_ts
return time_to_reset
[docs] def events(self, group, from_date=DEFAULT_DATETIME):
"""Fetch the events pages of a given group."""
date = datetime_to_utc(from_date)
date = date.strftime("since:%Y-%m-%dT%H:%M:%S.000Z")
resource = urijoin(group, self.REVENTS)
# Hack required due to Metup API does not support list
# values with the format `?param=value1¶m=value2`.
# It only works with `?param=value1,value2`.
# Morever, urrlib3 encodes comma characters when values
# are given using params dict, which it doesn't work
# with Meetup, either.
fixed_params = '?' + self.PFIELDS + '=' + ','.join(self.VEVENT_FIELDS)
fixed_params += '&' + self.PSTATUS + '=' + ','.join(self.VSTATUS)
resource += fixed_params
params = {
self.PORDER: self.VUPDATED,
self.PSCROLL: date,
self.PPAGE: self.max_items
}
try:
for page in self._fetch(resource, params):
yield page
except requests.exceptions.HTTPError as error:
if error.response.status_code == 410:
msg = "Group is no longer accessible: {}".format(error)
raise RepositoryError(cause=msg)
else:
raise error
[docs] def rsvps(self, group, event_id):
"""Fetch the rsvps of a given event."""
resource = urijoin(group, self.REVENTS, event_id, self.RRSVPS)
# Same hack that in 'events' method
fixed_params = '?' + self.PFIELDS + '=' + ','.join(self.VRSVP_FIELDS)
fixed_params += '&' + self.PRESPONSE + '=' + ','.join(self.VRESPONSE)
resource += fixed_params
params = {
self.PPAGE: self.max_items
}
for page in self._fetch(resource, params):
yield page
[docs] @staticmethod
def sanitize_for_archive(url, headers, payload):
"""Sanitize payload of a HTTP request by removing the token information
before storing/retrieving archived items
:param: url: HTTP url request
:param: headers: HTTP headers request
:param: payload: HTTP payload request
:returns url, headers and the sanitized payload
"""
if MeetupClient.PKEY_OAUTH2 in headers:
headers.pop(MeetupClient.PKEY_OAUTH2)
return url, headers, payload
def _fetch(self, resource, params):
"""Fetch a resource.
Method to fetch and to iterate over the contents of a
type of resource. The method returns a generator of
pages for that resource and parameters.
:param resource: type of the resource
:param params: parameters to filter
:returns: a generator of pages for the requeste resource
"""
url = urijoin(self.base_url, resource)
headers = {
self.PKEY_OAUTH2: 'Bearer {}'.format(self.api_token)
}
do_fetch = True
while do_fetch:
logger.debug("Meetup client calls resource: %s params: %s",
resource, str(params))
if not self.from_archive:
self.sleep_for_rate_limit()
r = self.fetch(url, payload=params, headers=headers)
if not self.from_archive:
self.update_rate_limit(r)
yield r.text
if r.links and 'next' in r.links:
url = r.links['next']['url']
else:
do_fetch = False