Source code for scholarmetrics.metrics

"""Collection of common metrics for academic scholars."""

import numpy as np
import numpy.typing as npt

__all__ = ['euclidean', 'gindex', 'hindex']


[docs] def euclidean(arr: npt.ArrayLike, ignore_nan: bool = True) -> float: """ Calculate Euclidean index for an author. An Euclidean index of a vector is the square root of the sum of the squared elements. Parameters ---------- arr : array-like Array of citations. ignore_nan : bool (optional, default=True) If True, remove nan values and return 0 if all values are nan. Returns ------- eui : float Euclidean index of the author for the given citations. Examples -------- >>> from scholarmetrics import euclidean >>> citations = [6, 10, 5, 46, 0, 2] >>> euclidean(citations) 47.75981574503821 Notes ----- The Euclidean index was originally proposed by Motty Perry and Philip J. Reny [eu]_. References ---------- .. [eu] Perry, M. and P. J. Reny (2016): "How to Count Citations If You Must", *The American Economic Review*, 106(9), pp. 2722-2241. DOI: 10.1257/aer.20140850 """ arr = _to_array(arr, ignore_nan) eui = np.linalg.norm(arr) return eui
[docs] def gindex(arr: npt.ArrayLike) -> int: """ Calculate g-index for an author. An g-index of x means that the author's top x publications together accumulated at least :math:`x^2` citations. Parameters ---------- arr : array-like Array of citations. Returns ------- gi : int g-index of the author for the given citations. Examples -------- >>> from scholarmetrics import gindex >>> citations = [6, 10, 5, 46, 0, 2] >>> gindex(citations) 6 Notes ----- The g-index was originally proposed by Leo Egghe [g]_. It excludes uncited publications. nan values are silently treated as zero values. References ---------- .. [g] Egghe, L. (2006): "Theory and practise of the g-index", *Scientometrics*, 69(1), pp. 131–152. DOI: 10.1007/s11192-006-0144-7 """ arr = _to_array(arr, ignore_nan=True) arr = arr[np.nonzero(arr)] cum_sr = np.cumsum(sorted(arr, reverse=True)) sqr_idx = [n**2 for n in range(1, len(arr) + 1)] gi = sum([c >= i for (c, i) in zip(cum_sr, sqr_idx)]) return gi
[docs] def hindex(arr: npt.ArrayLike, ignore_nan: bool = True) -> int | float: """ Calculate h-index for an author. An h-index of x means that the author has at least x publications that have been cited at least x times. Parameters ---------- arr : array-like Array of citations. ignore_nan : bool (optional, default=True) If True, ignore nan values and return 0 if all values are nan. Returns ------- hi : int H-index of the author for the given citations. Examples -------- >>> from scholarmetrics import hindex >>> citations = [6, 10, 5, 46, 0, 2] >>> hindex(citations) 4 Notes ----- The h-index was originally proposed by Jorge E. Hirsch [h]_. References ---------- .. [h] Hirsch, J. E. (2005): "An index to quantify an individual's scientific research output", *National Academy of Sciences of the USA* 102(46). DOI: 10.1073/pnas.0507655102 """ arr = _to_array(arr, ignore_nan=True) # remove nan in any case if not ignore_nan and len(arr) == 0: # return nan if all values are nan return np.nan sr = sorted(arr, reverse=True) idx = range(1, len(sr) + 1) hi = sum([p <= c for (c, p) in zip(sr, idx)]) return hi
def _to_array(arr: npt.ArrayLike, ignore_nan: bool) -> np.ndarray: """Helper function to remove or replace nan values from an array-like object and return a cleaned numpy array. """ arr = np.array(arr) if ignore_nan: return arr[np.isfinite(arr)] else: return arr