557 lines
19 KiB
PHP
557 lines
19 KiB
PHP
<?php
|
|
/* -*- tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
# ***** BEGIN LICENSE BLOCK *****
|
|
# This file is part of Plume Framework, a simple PHP Application Framework.
|
|
# Copyright (C) 2001-2010 Loic d'Anterroches and contributors.
|
|
#
|
|
# Plume Framework is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU Lesser General Public License as published by
|
|
# the Free Software Foundation; either version 2.1 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# Plume Framework is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Lesser General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Lesser General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
|
#
|
|
# ***** END LICENSE BLOCK ***** */
|
|
|
|
/**
|
|
* Core A/B testing component.
|
|
*
|
|
* The two importants methods are `test` and `convert`.
|
|
*
|
|
* For performance reasons, the A/B testing component requires you to
|
|
* setup a cache (APC or Memcached) and use the MongoDB database. The
|
|
* amount of data in the MongoDB should not be that big for most of
|
|
* the websites and as such it is fine if you are using the 32bit
|
|
* version of MongoDB.
|
|
*
|
|
* For the moment the storage is not abstracted to use another database.
|
|
*
|
|
* All the configuration variables for the component start with
|
|
* `pluf_ab_`. You need to add 'Pluf_AB' to your list of middleware.
|
|
*
|
|
*/
|
|
class Pluf_AB
|
|
{
|
|
/**
|
|
* MongoDB database handler.
|
|
*/
|
|
public static $db = null;
|
|
|
|
/**
|
|
* Returns an alternative for a given test.
|
|
*
|
|
* The middleware is already storing the uid of the user and makes
|
|
* it available as $request->pabuid.
|
|
*
|
|
* @param $test string Unique name of the test
|
|
* @param $request Pluf_HTTP_Request
|
|
* @param $alts array Alternatives to pick from (array(true,false))
|
|
* @param $weights array Weights for the alternatives (null)
|
|
* @param $desc string Optional description of the test ('')
|
|
* @return mixed One value from $alts
|
|
*/
|
|
public static function test($test, &$request, $alts=array(true,false),
|
|
$weights=null, $desc='')
|
|
{
|
|
if (Pluf::f('pluf_ab_allow_force', false) and
|
|
isset($request->GET[$test])) {
|
|
return $alts[$request->GET[$test]];
|
|
}
|
|
$db = self::getDb();
|
|
// Get or set test
|
|
$dtest = $db->tests->findOne(array('_id' => $test),
|
|
array('_id', 'active', 'winner'));
|
|
if ($dtest == null) {
|
|
$dtest = array('_id' => $test,
|
|
'creation_dtime' => gmdate('Y-m-d H:i:s',
|
|
$request->time),
|
|
'desc' => $desc,
|
|
'alts' => $alts,
|
|
'exp' => 0,
|
|
'conv' => 0,
|
|
'active' => true);
|
|
for ($i=0;$i<count($alts);$i++) {
|
|
$dtest['expalt_'.$i] = 0;
|
|
$dtest['convalt_'.$i] = 0;
|
|
}
|
|
$db->tests->update(array('_id'=>$test), $dtest,
|
|
array('upsert' => true));
|
|
} elseif (!$dtest['active']) {
|
|
// If test closed with given alternative, returns alternative
|
|
return (isset($dtest['winner'])) ? $alts[$dtest['winner']] : $alts[0];
|
|
}
|
|
if (!isset($request->pabuid)) {
|
|
$request->pabuid = self::getUid($request);
|
|
}
|
|
if ($request->pabuid == 'bot') {
|
|
return $alts[0];
|
|
}
|
|
// If $request->pabuid in test, returns corresponding alternative
|
|
$intest = $db->intest->findOne(array('_id' => $test.'##'.$request->pabuid),
|
|
array('_id', 'alt'));
|
|
if ($intest) {
|
|
return $alts[$intest['alt']];
|
|
}
|
|
// Else find alternative, store and return it
|
|
if ($weights == null) {
|
|
$weights = array_fill(0, count($alts), 1.0/count($alts));
|
|
}
|
|
$alt = self::weightedRand($weights);
|
|
$intest = array('_id' => $test.'##'.$request->pabuid,
|
|
'test' => $test,
|
|
'pabuid' => $request->pabuid,
|
|
'first_dtime' => gmdate('Y-m-d H:i:s',
|
|
$request->time),
|
|
'alt' => $alt);
|
|
$db->intest->update(array('_id' => $test.'##'.$request->pabuid),
|
|
$intest, array('upsert' => true));
|
|
// Update the counts of the test
|
|
$db->tests->update(array('_id' => $test),
|
|
array('$inc' => array('exp' => 1,
|
|
'expalt_'.$alt => 1)));
|
|
return $alts[$alt];
|
|
}
|
|
|
|
/**
|
|
* Mark a test as converted.
|
|
*
|
|
* A user which was not exposed to the test or a bot is not marked
|
|
* as converted as it is not significant.
|
|
*
|
|
* @param $test string Test
|
|
* @param $request Pluf_HTTP_Request
|
|
*/
|
|
public static function convert($test, $request)
|
|
{
|
|
if (!isset($request->pabuid) or $request->pabuid == 'bot') {
|
|
return;
|
|
}
|
|
$db = self::getDb();
|
|
$id = $test.'##'.$request->pabuid;
|
|
$intest = $db->intest->findOne(array('_id' => $id),
|
|
array('_id', 'alt'));
|
|
if (!$intest) {
|
|
// Not tested
|
|
return;
|
|
}
|
|
$conv = $db->convert->findOne(array('_id' => $id));
|
|
if ($conv) {
|
|
// Already converted
|
|
return;
|
|
}
|
|
$dtest = $db->tests->findOne(array('_id' => $test));
|
|
if (!$dtest or !$dtest['active']) {
|
|
return;
|
|
}
|
|
$conv = array(
|
|
'_id' => $id,
|
|
'test' => $test,
|
|
);
|
|
$db->convert->update(array('_id' => $id), $conv,
|
|
array('upsert' => true));
|
|
// increment the test counters
|
|
$db->tests->update(array('_id' => $test),
|
|
array('$inc' => array('conv' => 1,
|
|
'convalt_'.$intest['alt'] => 1)));
|
|
}
|
|
|
|
/**
|
|
* Register a property set for the user.
|
|
*
|
|
* This allows you to segment your users with these properties.
|
|
*
|
|
* @param $request Pluf_HTTP_Request
|
|
* @param $props array Properties
|
|
*/
|
|
public static function register(&$request, $props)
|
|
{
|
|
$pabuid = (isset($request->pabuid)) ?
|
|
$request->pabuid :
|
|
self::getUid($request);
|
|
if ($pabuid == 'bot') {
|
|
return;
|
|
}
|
|
$request->pabuid = $pabuid;
|
|
$request->pabprops = array_merge($request->pabprops, $props);
|
|
}
|
|
|
|
/**
|
|
* Track a funnel.
|
|
*
|
|
* The array of properties can be used to track different A/B
|
|
* testing cases.
|
|
*
|
|
* The list of properties must be the same at all the steps of the
|
|
* funnel, you cannot pass array('gender' => 'M') at step 1 and
|
|
* array('age' => 32) at step 2. You need to pass both of them at
|
|
* all steps.
|
|
*
|
|
* @param $funnel string Name of the funnel
|
|
* @param $step int Step in the funnel, from 1 to n
|
|
* @param $stepname string Readable name for the step
|
|
* @param $request Pluf_HTTP_Request Request object
|
|
* @param $props array Array of properties associated with the funnel (array())
|
|
*/
|
|
public static function trackFunnel($funnel, $step, $stepname, $request, $props=array())
|
|
{
|
|
$pabuid = (isset($request->pabuid)) ?
|
|
$request->pabuid :
|
|
self::getUid($request);
|
|
if ($pabuid == 'bot') {
|
|
return;
|
|
}
|
|
$request->pabuid = $pabuid;
|
|
$cache = Pluf_Cache::factory();
|
|
$key = 'pluf_ab_funnel_'.crc32($funnel.'#'.$step.'#'.$pabuid);
|
|
if ($cache->get($key, false)) {
|
|
return; // The key is valid 60s not to track 2 steps within 60s
|
|
}
|
|
$cache->set($key, '1', 60);
|
|
$what = array(
|
|
'f' => $funnel,
|
|
's' => $step,
|
|
'sn' => $stepname,
|
|
't' => (int) gmdate('Ymd', $request->time),
|
|
'u' => $pabuid,
|
|
'p' => array_merge($request->pabprops, $props),
|
|
);
|
|
$db = self::getDb();
|
|
$db->funnellogs->insert($what);
|
|
}
|
|
|
|
/**
|
|
* Process the response of a view.
|
|
*
|
|
* If the request has no cookie and the request has a pabuid, set
|
|
* the cookie in the response.
|
|
*
|
|
* @param Pluf_HTTP_Request The request
|
|
* @param Pluf_HTTP_Response The response
|
|
* @return Pluf_HTTP_Response The response
|
|
*/
|
|
function process_response($request, $response)
|
|
{
|
|
if (!isset($request->COOKIE['pabuid']) and isset($request->pabuid)
|
|
and $request->pabuid != 'bot') {
|
|
$response->cookies['pabuid'] = $request->pabuid;
|
|
}
|
|
if (isset($request->pabprops) and count($request->pabprops)
|
|
and $request->pabuid != 'bot') {
|
|
$response->cookies['pabprops'] = Pluf_Sign::dumps($request->pabprops, null, true);
|
|
}
|
|
return $response;
|
|
}
|
|
|
|
/**
|
|
* Process the request.
|
|
*
|
|
* If the request has the A/B test cookie, set $request->pabuid.
|
|
*
|
|
* @param Pluf_HTTP_Request The request
|
|
* @return bool False
|
|
*/
|
|
function process_request($request)
|
|
{
|
|
if (isset($request->COOKIE['pabuid']) and
|
|
self::check_uid($request->COOKIE['pabuid'])) {
|
|
$request->pabuid = $request->COOKIE['pabuid'];
|
|
}
|
|
$request->pabprops = array();
|
|
if (isset($request->COOKIE['pabprops'])) {
|
|
try {
|
|
$request->pabprops = Pluf_Sign::loads($request->COOKIE['pabprops']);
|
|
} catch (Exception $e) {
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Get a MongoDB database handle.
|
|
*
|
|
* It opens only one connection per request and tries to keep a
|
|
* persistent connection between the requests.
|
|
*
|
|
* The configuration keys used are:
|
|
*
|
|
* `pluf_ab_mongo_server`: 'mongodb://localhost:27017'
|
|
* `pluf_ab_mongo_options`: array('connect' => true,
|
|
* 'persist' => 'pluf_ab_mongo')
|
|
* `pluf_ab_mongo_db`: 'pluf_ab'
|
|
*
|
|
* If you have a default installation of MongoDB, it should work
|
|
* out of the box.
|
|
*
|
|
*/
|
|
public static function getDb()
|
|
{
|
|
if (self::$db !== null) {
|
|
return self::$db;
|
|
}
|
|
$server = Pluf::f('pluf_ab_mongo_server', 'mongodb://localhost:27017');
|
|
$options = Pluf::f('pluf_ab_mongo_options',
|
|
array('connect' => true, 'persist' => 'pluf_ab_mongo'));
|
|
$conn = new Mongo($server, $options);
|
|
self::$db = $conn->selectDB(Pluf::f('pluf_ab_mongo_db', 'pluf_ab'));
|
|
return self::$db;
|
|
}
|
|
|
|
/**
|
|
* Get the uid of a given request.
|
|
*
|
|
* @param $request Pluf_HTTP_Request
|
|
*/
|
|
public static function getUid($request)
|
|
{
|
|
if (isset($request->COOKIE['pabuid']) and
|
|
self::check_uid($request->COOKIE['pabuid'])) {
|
|
return $request->COOKIE['pabuid'];
|
|
}
|
|
if (!isset($request->SERVER['HTTP_USER_AGENT']) or
|
|
self::isBot($request->SERVER['HTTP_USER_AGENT'])) {
|
|
return 'bot';
|
|
}
|
|
// Here we need to make an uid, first check if a user with
|
|
// same ip/agent exists and was last seen within the last 1h.
|
|
// We get that from MemcacheDB
|
|
$cache = Pluf_Cache::factory();
|
|
$key = 'pluf_ab_'.crc32($request->remote_addr.'#'.$request->SERVER['HTTP_USER_AGENT']);
|
|
if ($uid=$cache->get($key, null)) {
|
|
$cache->set($key, $uid, 3600);
|
|
return $uid;
|
|
}
|
|
$uid = self::make_uid($request);
|
|
$cache->set($key, $uid, 3600);
|
|
return $uid;
|
|
}
|
|
|
|
/**
|
|
* Check if a given user agent is a bot.
|
|
*
|
|
* @param $user_agent string User agent string
|
|
* @return bool True if the user agent is a bot
|
|
*/
|
|
public static function isBot($user_agent)
|
|
{
|
|
static $bots = array('robot', 'checker', 'crawl', 'discovery',
|
|
'hunter', 'scanner', 'spider', 'sucker', 'larbin',
|
|
'slurp', 'libwww', 'lwp', 'yandex', 'netcraft',
|
|
'wget', 'twiceler');
|
|
static $pbots = array('/bot[\s_+:,\.\;\/\\\-]/i',
|
|
'/[\s_+:,\.\;\/\\\-]bot/i');
|
|
foreach ($bots as $r) {
|
|
if (false !== stristr($user_agent, $r)) {
|
|
return true;
|
|
}
|
|
}
|
|
foreach ($pbots as $p) {
|
|
if (preg_match($p, $user_agent)) {
|
|
return true;
|
|
}
|
|
}
|
|
if (false === strpos($user_agent, '(')) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Returns a random weighted alternative.
|
|
*
|
|
* Given a series of weighted alternative in the format:
|
|
*
|
|
* <pre>
|
|
* array('alt1' => 0.2,
|
|
* 'alt2' => 0.3,
|
|
* 'alt3' => 0.5);
|
|
* </pre>
|
|
*
|
|
* Returns the key of the selected alternative. In the following
|
|
* example, the alternative 3 (alt3) has a 50% chance to be
|
|
* selected, if the selected the results would be 'alt3'.
|
|
|
|
* @link: http://20bits.com/downloads/w_rand.phps
|
|
*
|
|
* @param $weights array Weighted alternatives
|
|
* @return mixed Key of the selected $weights array
|
|
*/
|
|
public static function weightedRand($weights)
|
|
{
|
|
$r = mt_rand(1,10000);
|
|
$offset = 0;
|
|
foreach ($weights as $k => $w) {
|
|
$offset += $w*10000;
|
|
if ($r <= $offset) {
|
|
return $k;
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Given a request, make a corresponding A/B test UID.
|
|
*
|
|
* The UID is based on the time, the remote address, a random
|
|
* component and is hashed to ensure the integrity and avoid the
|
|
* need of a database hit when controlled.
|
|
*
|
|
* @param $request Pluf_HTTP_Request
|
|
* @return string UID
|
|
*/
|
|
public static function make_uid($request)
|
|
{
|
|
$base = sprintf('%08X%08X%08X', $request->time,
|
|
sprintf('%u', crc32($request->remote_addr)),
|
|
rand());
|
|
return sprintf('%s%08X', $base, sprintf('%u', crc32($base.md5(Pluf::f('secret_key')))));
|
|
}
|
|
|
|
/**
|
|
* Validate the uid in the cookie.
|
|
*
|
|
* @see self::make_uid
|
|
*
|
|
* @param $uid string The UID
|
|
* @return bool True if the UID is valid
|
|
*/
|
|
public static function check_uid($uid)
|
|
{
|
|
if (strlen($uid) != 32) {
|
|
return false;
|
|
}
|
|
$check = sprintf('%08X', sprintf('%u', crc32(substr($uid, 0, 24).md5(Pluf::f('secret_key')))));
|
|
return ($check == substr($uid, -8));
|
|
}
|
|
|
|
/* ------------------------------------------------------------
|
|
*
|
|
* Statistics Functions
|
|
*
|
|
* Note: I am not a statistician, use at your own risk!
|
|
*
|
|
* ------------------------------------------------------------ */
|
|
|
|
/**
|
|
* Given a conversion rate calculate the recommended sample sizes.
|
|
*
|
|
* The sample sizes is calculated to be significant at 95% in the
|
|
* case of a variation of conversion with respect to the other
|
|
* alternative of 25%, 15% and 5%.
|
|
*
|
|
* @param $conv Conversion rate ]0.0;1.0]
|
|
* @return array The 3 sample sizes for 25%, 15% and 5%
|
|
*/
|
|
public static function ssize($conv)
|
|
{
|
|
$a = 3.84145882689; // $a = pow(inverse_ncdf(1-(1-0.95)/2),2)
|
|
$res = array();
|
|
$bs = array(0.0625, 0.0225, 0.0025);
|
|
foreach ($bs as $b) {
|
|
$res[] = (int) ((1-$conv)*$a/($b*$conv));
|
|
}
|
|
return $res;
|
|
}
|
|
|
|
|
|
/**
|
|
* Given a test, returns the corresponding stats.
|
|
*
|
|
* @param $test array Test definition and results
|
|
* @return array Statistics for the test
|
|
*/
|
|
public static function getTestStats($test)
|
|
{
|
|
$stats = array(); // Will store the stats
|
|
$n = count($test['alts']);
|
|
$aconvr = array(); // All the conversion rates to sort the alternatives
|
|
for ($i=0;$i<$n;$i++) {
|
|
$conv = (isset($test['convalt_'.$i])) ? $test['convalt_'.$i] : 0;
|
|
$exp = (isset($test['expalt_'.$i])) ? $test['expalt_'.$i] : 0;
|
|
$convr = self::cr(array($exp, $conv));
|
|
$nconvr = ($convr !== null) ?
|
|
sprintf('%01.2f%%', $convr*100.0) : 'N/A';
|
|
$ssize = ($convr !== null and $convr > 0) ?
|
|
self::ssize($convr) : array();
|
|
$stats[] = array('alt' => $i,
|
|
'convr' => $convr,
|
|
'conv' => $conv,
|
|
'exp' => $exp,
|
|
'nconvr' => $nconvr,
|
|
'ssize' => $ssize);
|
|
$aconvr[] = ($convr === null) ? 0 : $convr;
|
|
}
|
|
array_multisort($aconvr, SORT_DESC, $stats);
|
|
// We want the best to be significantly better than the second best.
|
|
for ($i=0;$i<$n;$i++) {
|
|
$convr = $stats[$i]['convr'];
|
|
$exp = $stats[$i]['exp'];
|
|
$conv = $stats[$i]['conv'];
|
|
$comp = false;
|
|
$zscore = false;
|
|
$conf = false;
|
|
$better = false;
|
|
if ($i != 1 and $stats[1]['convr'] > 0) {
|
|
// Compare with base case and get confidence/Z-score
|
|
$comp = 100.0 * (float) ($convr - $stats[1]['convr'])/ (float) ($stats[1]['convr']);
|
|
if ($comp > 0) $better = true;
|
|
$comp = sprintf('%01.2f%%', $comp);
|
|
$zscore = self::zscore(array($stats[1]['exp'], $stats[1]['conv']),
|
|
array($exp, $conv));
|
|
$conf = sprintf('%01.2f%%', self::cumnormdist($zscore)*100.0);
|
|
$zscore = sprintf('%01.2f', $zscore);
|
|
}
|
|
$stats[$i]['comp'] = $comp;
|
|
$stats[$i]['zscore'] = $zscore;
|
|
$stats[$i]['conf'] = $conf;
|
|
$stats[$i]['better'] = $better;
|
|
}
|
|
return $stats;
|
|
}
|
|
|
|
public static function cr($t)
|
|
{
|
|
if ($t[1] < 0) return null;
|
|
if ($t[0] <= 0) return null;
|
|
return $t[1]/$t[0];
|
|
}
|
|
|
|
public static function zscore($c, $t)
|
|
{
|
|
$z = self::cr($t)-self::cr($c);
|
|
$s = (self::cr($t)*(1-self::cr($t)))/$t[0]
|
|
+ (self::cr($c)*(1-self::cr($c)))/$c[0];
|
|
return $z/sqrt($s);
|
|
}
|
|
|
|
/**
|
|
* Approximation of the cumulative normal distribution.
|
|
*/
|
|
public static function cumnormdist($x)
|
|
{
|
|
$b1 = 0.319381530;
|
|
$b2 = -0.356563782;
|
|
$b3 = 1.781477937;
|
|
$b4 = -1.821255978;
|
|
$b5 = 1.330274429;
|
|
$p = 0.2316419;
|
|
$c = 0.39894228;
|
|
|
|
if($x >= 0.0) {
|
|
$t = 1.0 / ( 1.0 + $p * $x );
|
|
return (1.0 - $c * exp( -$x * $x / 2.0 ) * $t *
|
|
( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));
|
|
} else {
|
|
$t = 1.0 / ( 1.0 - $p * $x );
|
|
return ( $c * exp( -$x * $x / 2.0 ) * $t *
|
|
( $t *( $t * ( $t * ( $t * $b5 + $b4 ) + $b3 ) + $b2 ) + $b1 ));
|
|
}
|
|
}
|
|
}
|