Source code for sortinghat.core.recommendations.gender

# -*- coding: utf-8 -*-
#
# Copyright (C) 2014-2021 Bitergia
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
#     Santiago Dueñas <sduenas@bitergia.com>
#     Miguel Ángel Fernández <mafesan@bitergia.com>
#     Eva Millán <evamillan@bitergia.com>
#


import logging
import re

import requests
import urllib3.util

from ..db import find_individual_by_uuid
from ..errors import NotFoundError, InvalidValueError

logger = logging.getLogger(__name__)

name_pattern = re.compile(r"(^\w+)\s\w+")


[docs]def recommend_gender(uuids): """Recommend possible genders for a list of individuals. Returns a generator of gender recommendations based on the individuals first name, using the genderize.io API. The genders returned by the API are 'male' and 'female'. Each recommendation contains the uuid of the individual, the suggested gender and the accuracy of the prediction. When the individual does not have a name set, or the name does not follow a 'FirstName LastName' pattern, or the individual is not found, it will not be included in the result. :param uuids: list of individual identifiers :returns: a generator of recommendations """ logger.debug( f"Generating genders recommendations; " f"uuids={uuids}; ..." ) for uuid in uuids: try: individual = find_individual_by_uuid(uuid) name = _get_individual_name(individual) gender, accuracy = _genderize(name) except NotFoundError: message = f"Skipping {uuid}: Individual not found" logger.warning(message) continue except InvalidValueError: message = f"Skipping {uuid}: No valid name" logger.warning(message) continue except requests.exceptions.RequestException as e: message = f"Skipping {uuid} due to a connection error: {str(e)}" logger.warning(message) continue else: yield uuid, (gender, accuracy) logger.info(f"Gender recommendations generated; uuids='{uuids}'")
def _get_individual_name(individual): """Get the first name of an individual from their profile""" try: name_match = name_pattern.match(individual.profile.name) first_name = name_match.group(1).lower() except Exception as e: raise InvalidValueError(msg=str(e)) else: return first_name def _genderize(name): """Fetch gender from genderize.io""" from django.conf import settings api_key = settings.SORTINGHAT_GENDERIZE_API_KEY genderize_api_url = "https://api.genderize.io/" total_retries = 10 max_retries = 5 sleep_time = 0.25 status_forcelist = [502] params = { 'name': name } if api_key: params['apikey'] = api_key session = requests.Session() retries = urllib3.util.Retry(total=total_retries, connect=max_retries, status=max_retries, status_forcelist=status_forcelist, backoff_factor=sleep_time, raise_on_status=True) session.mount('http://', requests.adapters.HTTPAdapter(max_retries=retries)) session.mount('https://', requests.adapters.HTTPAdapter(max_retries=retries)) r = session.get(genderize_api_url, params=params) result = r.json() r.raise_for_status() gender = result.get('gender', None) prob = result.get('probability', None) acc = int(prob * 100) if prob else None return gender, acc