About

We are going to pull data from a webservice, transform it and serve it back via a webservice.

The required libraries

import airbnb as ab
import pandas as pd
import locale as lo
import argparse as ap
import traceback as tb
import os as os
import functools as ft
from dateutil import parser as dtp

As we'll expose webservice endpoints, we should import the following flask elements.

Those elements can be installed using pip install flask flask-restful flask-wtf flask-cors

import flask as fl
import flask_restful as flr
import flask_restful.reqparse as flr_r
import flask_cors as flc
import flask_wtf.csrf as flwc

Get data from Airbnb API

Let's pull reviews and rating information from Airbnb for a given listing_id (a property available to rent on Airbnb).

def get_reviews(
    listing_id, 
    locale='fr', 
    page_size=50, 
    keep_n_reviews=10, 
    min_rating=3,
    most_recent_first=True):

    api = ab.api.Api(randomize=True)
    api._session.headers['x-airbnb-locale'] = locale
    api._session.headers['accept-language'] = locale
    lo.setlocale(lo.LC_ALL, locale)
    
    reviews = []
    offset = 0
    reviews_count = 0
    has_more_reviews = True
    reviews_count = -1

    while has_more_reviews:
        page_reviews = api.get_reviews(
            listing_id=listing_id, 
            offset=offset, 
            limit=page_size)

        for review in page_reviews['reviews']:
            review['created_at__dt'] = dtp.parse(review['created_at'])
            reviews.append(review)

        reviews_count = page_reviews['metadata']['reviews_count']

        if len(page_reviews['reviews']) < page_size:
            has_more_reviews = False
        else:
            offset += page_size

    if min_rating is not None:
        reviews = [x for x in reviews if x['rating'] >= min_rating]
    
    def compare_reviews(left, right, most_recent_first=most_recent_first):
        if most_recent_first:
            if left['created_at__dt'] < right['created_at__dt']:
                return 1
            elif left['created_at__dt'] == right['created_at__dt']:
                return 0
            elif left['created_at__dt'] > right['created_at__dt']:
                return -1
            
    reviews = sorted(
        reviews, 
        key=ft.cmp_to_key(compare_reviews))
    
    if keep_n_reviews > 0:
        reviews = reviews[:keep_n_reviews]
        
    return reviews
def get_listing_details(
    listing_id, 
    locale='fr'):
    
    api = ab.api.Api(randomize=True)
    api._session.headers['x-airbnb-locale'] = locale
    api._session.headers['accept-language'] = locale
    lo.setlocale(lo.LC_ALL, locale)
    
    url_base = ab.api.API_URL
    
    r = api._session.get(
        '{0}/pdp_listing_details/{1}'.format(
            url_base,
            listing_id), 
        params={ '_format': 'for_rooms_show'})
    
    r.raise_for_status()
    
    return r.json()
def get_ratings(
    listing_id, 
    locale='fr',
    keep_n_reviews=10, 
    min_rating=3,
    most_recent_first=True):
    
    listing_details = get_listing_details(
        listing_id=listing_id, 
        locale=locale)
    
    reviews = get_reviews(
        listing_id=listing_id, 
        locale=locale, 
        keep_n_reviews=keep_n_reviews, 
        min_rating=min_rating,
        most_recent_first=most_recent_first)
    
    overall_rating_text = listing_details['pdp_listing_detail']['reviews_module']['localized_overall_rating']
    overall_rating_terms = listing_details['pdp_listing_detail']['reviews_module']['appreciation_tags']
    
    review_details = listing_details['pdp_listing_detail']['review_details_interface']

    overall_rating = lo.atof(overall_rating_text)
    review_count = review_details['review_count']
    review_summary = review_details['review_summary']
    
    for summary in review_summary:
        summary['rating'] = lo.atof(summary['localized_rating'])
        
    return {
        'overall_rating': overall_rating, 
        'review_count': review_count, 
        'categories': review_summary,
        'rating_terms': overall_rating_terms,
        'reviews': reviews
    }

We can get the individual reviews by calling the get_reviews function defiend above:

listing_id = 36902451

Publishing data as a webservice

With flask you can bind a class to a webservice endpoint by deriving a class from the Resouce base class and providing codes for each supported verb (get, post, etc.). Let's define a basic endpoint that will only be used as a simple /info heartbeat check.

class Info(flr.Resource):

    def __init__(self):
        super(Info, self).__init__()

    def get(self):
        return {
            'success': True,
            'version': '0.1.0',
            'status': 'ready'
        }

We can define the application as well as a default /info endpoint this way:

def configure_api():

    app = fl.Flask('apps')
    app.config['BUNDLE_ERRORS'] = True

    csrf = flwc.CSRFProtect()
    csrf.init_app(app)

    cors = flc.CORS(
        app, 
        resources={
            r'/*': {
                'origins': '*', 
                'send_wildcard': 'False'
            }
        })

    api = flr.Api(
        app=app,
        catch_all_404s=True)

    api.add_resource(
        Info, 
        '/info',
        endpoint='info')

    return {
        'app': app, 
        'csrf': csrf, 
        'cors': cors, 
        'api': api
    }

Let's create the application components:

webservice_app = configure_api()

Let's create an endpoint for the Airbnb data.

class AirbnbRating(flr.Resource):

    def __init__(self):

        self.reqparse = flr_r.RequestParser()

        self.reqparse.add_argument(
            'listing_id',
            type=int,
            location='json',
            required=True,
            help='"listing_id" is mandatory ({error_msg})')

        self.reqparse.add_argument(
            'locale',
            type=str,
            default='fr',
            location='json',
            required=False,
            help='"locale" error ({error_msg})')

        self.reqparse.add_argument(
            'keep_n_reviews',
            type=int,
            default=10,
            location='json',
            required=False,
            help='"keep_n_reviews" error ({error_msg})')

        self.reqparse.add_argument(
            'min_rating',
            type=int,
            default=3,
            location='json',
            required=False,
            help='"min_rating" error ({error_msg})')

        self.reqparse.add_argument(
            'most_recent_first',
            type=bool,
            default=True,
            location='json',
            required=False,
            help='"most_recent_first" error ({error_msg})')
    
        super(AirbnbRating, self).__init__()

    def post(self):

        success = False
        error = None
        error_details = None
        result = None

        try:
            args = self.reqparse.parse_args()
            
            result = get_ratings(
                listing_id=args['listing_id'], 
                locale=args['locale'],
                keep_n_reviews=args['keep_n_reviews'], 
                min_rating=args['min_rating'],
                most_recent_first=args['most_recent_first'])

            success = True

        except Exception as e:
            error_details = tb.format_exc()
            error = e.__str__()

        return {
            'success': success,
            'error': error,
            'error_details': error_details,
            'result': result
        }

We can add this new endpoint:

webservice_app['api'].add_resource(
    AirbnbRating, 
    '/rating',
    endpoint='rating')

Let's start the webservice

webservice_app['app'].run(
    host='0.0.0.0',
    port=1234,
    debug=False)
 * Serving Flask app 'apps' (lazy loading)
 * Environment: production
   WARNING: This is a development server. Do not use it in a production deployment.
   Use a production WSGI server instead.
 * Debug mode: off
 * Running on all addresses.
   WARNING: This is a development server. Do not use it in a production deployment.
 * Running on http://192.168.0.19:1234/ (Press CTRL+C to quit)
192.168.0.19 - - [08/Jun/2021 15:31:23] "GET / HTTP/1.1" 404 -
192.168.0.19 - - [08/Jun/2021 15:31:23] "GET /favicon.ico HTTP/1.1" 404 -
192.168.0.19 - - [08/Jun/2021 15:31:29] "GET /info HTTP/1.1" 200 -
127.0.0.1 - - [08/Jun/2021 15:31:49] "GET /info HTTP/1.1" 200 -
127.0.0.1 - - [08/Jun/2021 15:31:50] "GET /favicon.ico HTTP/1.1" 404 -

We can navigate to the info endpoint and verify the webservice is doing what we need: