Source code for e3fp.fingerprint.metrics.fprint_metrics

"""Fingerprint comparison metrics.

Author: Seth Axen
E-mail: seth.axen@gmail.com
"""
from __future__ import division

import numpy as np
from ..fprint import CountFingerprint, diff_counts_dict


[docs]def tanimoto(fp1, fp2): """Calculate Tanimoto coefficient between fingerprints. Parameters ---------- fp1 : Fingerprint Fingerprint 1 fp2 : Fingerprint Fingerprint 2 Returns ------- float : Tanimoto coefficient. """ try: intersect = np.intersect1d( fp1.indices, fp2.indices, assume_unique=True ).shape[0] return intersect / (fp1.bit_count + fp2.bit_count - intersect) except ZeroDivisionError: return 0.0
[docs]def soergel(fp1, fp2): """Calculate Soergel similarity between fingerprints. Soergel similarity is the complement of Soergel distance and can be thought of as the analog of the Tanimoto coefficient for count/float-based fingerprints. For `Fingerprint`, it is equivalent to the Tanimoto coefficient. Parameters ---------- fp1 : Fingerprint Fingerprint 1 fp2 : Fingerprint Fingerprint 2 Returns ------- float : Soergel similarity. Reference ------- """ if not ( isinstance(fp1, CountFingerprint) and isinstance(fp2, CountFingerprint) ): return tanimoto(fp1, fp2) counts_diff = diff_counts_dict(fp1, fp2) temp = np.asarray( [ (abs(counts_diff[x]), max(fp1.get_count(x), fp2.get_count(x))) for x in counts_diff.keys() ], dtype=float, ).T soergel = 1 - np.sum(temp[0, :]) / np.sum(temp[1, :]) return soergel
[docs]def dice(fp1, fp2): """Calculate Dice coefficient between fingerprints. Parameters ---------- fp1 : Fingerprint Fingerprint 1 fp2 : Fingerprint Fingerprint 2 Returns ------- float : Dice coefficient. """ try: intersect = np.intersect1d( fp1.indices, fp2.indices, assume_unique=True ).shape[0] return 2 * intersect / (fp1.bit_count + fp2.bit_count) except ZeroDivisionError: return 0.0
[docs]def cosine(fp1, fp2): """Calculate cosine similarity between fingerprints. Parameters ---------- fp1 : Fingerprint Fingerprint 1 fp2 : Fingerprint Fingerprint 2 Returns ------- float : Cosine similarity. """ try: dot = sum(v * fp2.get_count(k) for k, v in fp1.counts.items()) root_norm = ( sum(v ** 2 for v in fp1.counts.values()) * sum(v ** 2 for v in fp2.counts.values()) ) ** 0.5 return dot / root_norm except ZeroDivisionError: return 0.0
[docs]def pearson(fp1, fp2): """Calculate Pearson correlation between fingerprints. Parameters ---------- fp1 : Fingerprint Fingerprint 1 fp2 : Fingerprint Fingerprint 2 Returns ------- float : Pearson correlation. """ try: dot = sum(v * fp2.get_count(k) for k, v in fp1.counts.items()) return (dot / fp1.bits - fp1.mean() * fp2.mean()) / ( fp1.std() * fp2.std() ) except ZeroDivisionError: return 0.0
# intersect = np.intersect1d(fp1.indices, fp2.indices, # assume_unique=True).shape[0] # return ((intersect / fp1.bits) - # ((fp1.mean() * fp2.mean()) / (fp1.std() * fp2.std())))
[docs]def hamming(fp1, fp2): """Calculate Hamming distance between fingerprints. Parameters ---------- fp1 : Fingerprint Fingerprint 1 fp2 : Fingerprint Fingerprint 2 Returns ------- float : Hamming distance. """ intersect = np.intersect1d( fp1.indices, fp2.indices, assume_unique=True ).shape[0] return fp1.bit_count + fp2.bit_count - 2 * intersect
[docs]def distance(fp1, fp2): """Calculate Euclidean distance between fingerprints. Parameters ---------- fp1 : Fingerprint Fingerprint 1 fp2 : Fingerprint Fingerprint 2 Returns ------- float : Euclidian distance. """ return hamming(fp1, fp2) ** 0.5