X-Git-Url: https://git-public.kairo.at/?p=php-utility-classes.git;a=blobdiff_plain;f=include%2Fclasses%2Fuseragent.php-class;h=4d7689c7c20fa22bacf5dcba69ff18dc45f0263a;hp=534dd716e2b65cef908dab41fd554a78edf79101;hb=4210e7b0abd0c640460a35fb73ed9d3d7bd1aa07;hpb=1defa974f93b4246f5ce8c46d91d805b4342c7eb diff --git a/include/classes/useragent.php-class b/include/classes/useragent.php-class index 534dd71..4d7689c 100755 --- a/include/classes/useragent.php-class +++ b/include/classes/useragent.php-class @@ -1,4 +1,40 @@ + * + * Alternatively, the contents of this file may be used under the terms of + * either the GNU General Public License Version 2 or later (the "GPL"), or + * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + class userAgent { // userAgent PHP class // get user agent and tell us what Browser is accessing @@ -12,7 +48,25 @@ class userAgent { // returns the User Agent brand name // var $version // the User Agent version + // var $bot + // bool: true if this agent is a bot + // var $uadata + // array of static user agent data (static vars in functions are set for all objects of this class!) + // + // function getBrand() + // returns the User Agent Brand Name + // function getVersion() + // returns the User Agent version + // + // function getAcceptLanguages() + // returns an associated array with the accepted languages of this UA + // keys are language codes, values are q factors (weights) + // + // function getUAString() + // returns the full User Agent string // + // function isbot() + // returns true if User Agent seems to be a bot // function isns() // returns true if User Agent seems to be Netscape brand, false if not // function isns4() @@ -39,28 +93,82 @@ class userAgent { // Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/51 (like Gecko) Safari/51 // Lynx/2.8.4rel.1 libwww-FM/2.14 SSL-MM/1.4.1 OpenSSL/0.9.6g // Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en-US; rv:1.0.1) Gecko/20021109 Chimera/0.6+ + // Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.7b) Gecko/20040302 Camino/0.7+ // Mozilla/5.0 (Windows; U; Win 9x 4.90; en-US; rv:1.3a) Gecko/20021207 Phoenix/0.5 // Mozilla/5.0 Galeon/1.2.7 (X11; Linux i686; U;) Gecko/20021204 // Mozilla/4.0 (compatible; MSIE 5.0; Windows XP) Opera 6.05 [ja] // Mozilla/4.0 (compatible; MSIE 5.12; Mac_PowerPC) OmniWeb/4.1.1-v424.6 + // Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6 + // Mozilla/5.0 (Windows; U; WinNT4.0; en-US; rv:1.5a) Gecko/20030728 Mozilla Firebird/0.6.1 + // Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7a) Gecko/20040216 Firefox/0.8.0+ + // Python-urllib/1.15 + // *** search bots: *** + // W3C_Validator/1.305.2.12 libwww-perl/5.64 + // Scooter/3.3 + // Spinne/2.0 med_AH + // Vagabondo/2.0 MT (webagent at wise-guys dot nl) + // TurnitinBot/1.5 ( ">http://www.turnitin.com/robot/crawlerinfo.html) + // FAST-WebCrawler/3.x Multimedia (mm dash crawler at fast dot no) + // Firefly/1.0 (compatible; Mozilla 4.0; MSIE 5.5) + // Googlebot/2.1 (+ ">http://www.googlebot.com/bot.html) + // Scrubby/2.2 ( ">http://www.scrubtheweb.com/) + // psbot/0.1 (+ ">http://www.picsearch.com/bot.html) + // NG/1.0 + // URL_Spider_Pro/3.0 ( ">http://www.innerprise.net/usp-spider.asp)" + // Pompos/1.3 ">http://dir.com/pompos.html + // Szukacz/1.5 (robot; www.szukacz.pl/jakdzialarobot.html; info@szukacz.pl) + // ASPseek/1.2.10 + // NPBot-1/2.0 + // NetResearchServer/2.7(loopimprovements.com/robot.html) + // dloader(NaverRobot)/1.0 + // Mozilla/4.0 compatible ZyBorg/1.0 Daily Refresh Beta-d03 (wn.zyborg@looksmart.net; + // Mozilla/2.0 (compatible; Ask Jeeves/Teoma) + // Mozilla/5.0 (Slurp/si; slurp@inktomi.com; ">http://www.inktomi.com/slurp.html) + // Mozilla/5.0 [en] (compatible; Gulper Web Bot 0.2.4 www.ecsl.cs.sunysb.edu/~maxim/cgi-bin/Link/GulperBot) + // Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; Girafabot; girafabot at girafa dot com; + // Mozilla/4.0 (efp@gmx.net) + // Mozilla/4.5 (compatible; HTTrack 3.0x; Windows 98) + // PingALink Monitoring Services 1.0 (http://www.pingalink.com) + // IlTrovatore-Setaccio (+ ">http://www.iltrovatore.it) + // Mercator-2.0 + // appie 1.1 (www.walhello.com) + // larbin_2.6.2 (larbin2.6.2@unspecified.mail) + // OWR_Crawler 0.1 + // search.ch V1.4.2 (spiderman@search.ch; + // WebFilter Robot 1.0 + // Openfind data gatherer, Openbot/3.0+(robot-response@openfind.com.tw;+ + // LinkWalker + // Internet Explorer 5.5 + // BaiDuSpider + // Mozilla/4.0 (compatible; B-l-i-t-z-B-O-T) + // B l i t z B O T @ t r i c u s . n e t (Mozilla compatible) + // sitecheck.internetseer.com (For more info see: ">http://sitecheck.internetseer.com) + // http://www.almaden.ibm.com/cs/crawler   [c01] + // ia_archiver + // Nutch + // Mozilla + // HeinrichderMiragoRobot + // dumbBot var $uastring; var $brand; var $version; + var $bot = false; + var $uadata = array(); - function userAgent($ua_string = "") { + function userAgent($ua_string = '') { // *** constructor *** if (strlen($ua_string)) { $this->uastring = $ua_string; } else { // read raw UA string - $this->uastring = $_SERVER["HTTP_USER_AGENT"]; + $this->uastring = $_SERVER['HTTP_USER_AGENT']; } // get UA brand and version $this->brand = "Unknown"; $this->version = 0; - if (ereg("([0-9a-zA-Z\.]+)/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + if (ereg("([0-9a-zA-Z\.()-]+)/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { $this->brand = $regs[1]; // this is a reasonable default :) $this->version = $regs[2]; // this is a reasonable default :) } @@ -76,10 +184,22 @@ class userAgent { $this->brand = "Chimera"; $this->version = $regs[1]; } + elseif (ereg("Camino/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "Camino"; + $this->version = $regs[1]; + } elseif (ereg("Phoenix/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { $this->brand = "Phoenix"; $this->version = $regs[1]; } + elseif (ereg("Mozilla Firebird/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "Mozilla Firebird"; + $this->version = $regs[1]; + } + elseif (ereg("Firefox/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "Firefox"; + $this->version = $regs[1]; + } elseif (ereg("Galeon/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { $this->brand = "Galeon"; $this->version = $regs[1]; @@ -108,6 +228,146 @@ class userAgent { $this->brand = "AppleWebKit"; $this->version = $regs[1]; } + elseif (ereg("W3C_Validator/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "W3C_Validator"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("ZyBorg/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "ZyBorg"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("Ask Jeeves/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "Ask Jeeves"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("Slurp/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "Slurp"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("Openbot/([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "Openbot"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("Mercator-([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "Mercator"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("appie ([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "appie"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("larbin_([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "larbin"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("Gulper Web Bot ([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "Gulper Web Bot"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("HTTrack ([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "HTTrack"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("OWR_Crawler ([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "OWR_Crawler"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("search.ch ([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "search.ch"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("WebFilter Robot ([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { + $this->brand = "WebFilter Robot"; + $this->version = $regs[1]; + $this->bot = true; + } + elseif (ereg("^LinkWalker", $this->uastring)) { + $this->brand = "LinkWalker"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("^Internet Explorer 5.5", $this->uastring)) { + $this->brand = "Unknown bot (IE5.5)"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("^Mozilla[\s ]*$", $this->uastring)) { + $this->brand = "Unknown bot (Mozilla)"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("^BaiDuSpider", $this->uastring)) { + $this->brand = "BaiDuSpider"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("^ia_archiver", $this->uastring)) { + $this->brand = "ia_archiver"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("^PingALink", $this->uastring)) { + $this->brand = "PingALink"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("^IlTrovatore-Setaccio", $this->uastring)) { + $this->brand = "IlTrovatore-Setaccio"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("^Nutch", $this->uastring)) { + $this->brand = "Nutch"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("^dumbBot", $this->uastring)) { + $this->brand = "dumbBot"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("http://www.almaden.ibm.com/cs/crawler", $this->uastring)) { + $this->brand = "almaden crawler"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("B-l-i-t-z-B-O-T", $this->uastring) || ereg("B l i t z B O T @ t r i c u s . n e t", $this->uastring)) { + $this->brand = "BlitzBOT"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("sitecheck.internetseer.com", $this->uastring)) { + $this->brand = "internetseer"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("Girafabot", $this->uastring)) { + $this->brand = "Girafabot"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("efp@gmx.net", $this->uastring)) { + $this->brand = "efp"; + $this->version = ""; + $this->bot = true; + } + elseif (ereg("HeinrichderMiragoRobot", $this->uastring)) { + $this->brand = "HeinrichderMiragoRobot"; + $this->version = ""; + $this->bot = true; + } elseif (ereg("MSIE ([0-9a-zA-Z\.+]+)", $this->uastring, $regs)) { $this->brand = "Microsoft Internet Explorer"; $this->version = $regs[1]; @@ -117,78 +377,96 @@ class userAgent { $this->version = $regs[1]; if (intval($this->version) == 4) { $this->brand .= " Communicator"; } } + + $botArray = array('Scooter','Spinne','Vagabondo','TurnitinBot','FAST-WebCrawler','Firefly','Googlebot', + 'Scrubby','psbot','NG','URL_Spider_Pro','Pompos','Szukacz','ASPseek','NPBot-1', + 'dloader(NaverRobot)','NetResearchServer','','','','','','',''); + + if (in_array($this->brand, $botArray)) { + $this->bot = true; + } } + function getBrand() { return $this->brand; } + function getVersion() { return $this->version; } + + function getAcceptLanguages() { + if (!isset($this->uadata['accept-languages'])) { + $headers = getAllHeaders(); + $accLcomp = explode(',', $headers['Accept-Language']); + $accLang = array(); + foreach ($accLcomp as $lcomp) { + if (strlen($lcomp)) { + $ldef = explode(';', $lcomp); + $accLang[$ldef[0]] = (float)((strpos($ldef[1],'q=')===0)?substr($ldef[1],2):1); + } + } + $this->uadata['accept-languages'] = $accLang; + } + return $this->uadata['accept-languages']; + } + + function getUAString() { return $this->uastring; } + function isbot() { return $this->bot; } + function isns() { - // set it static so that we don't have to call it that often - static $is_ns; - if (!isset($is_ns)) { - $is_ns = false; - if (strstr($this->brand, "Netscape")) { - $is_ns = true; + if (!isset($this->uadata['is_ns'])) { + $this->uadata['is_ns'] = false; + if (strstr($this->brand, 'Netscape')) { + $this->uadata['is_ns'] = true; } } - return $is_ns; + return $this->uadata['is_ns']; } function isns4() { - // set it static so that we don't have to call it that often - static $is_ns4; - if (!isset($is_ns4)) { - $is_ns4 = false; - if (strstr($this->brand, "Netscape") && (intval($this->version) == 4)) { - $is_ns4 = true; + if (!isset($this->uadata['is_ns4'])) { + $this->uadata['is_ns4'] = false; + if (strstr($this->brand, 'Netscape') && (intval($this->version) == 4)) { + $this->uadata['is_ns4'] = true; } } - return $is_ns4; + return $this->uadata['is_ns4']; } function isie() { - // set it static so that we don't have to call it that often - static $is_ie; - if (!isset($is_ie)) { + if (!isset($this->uadata['is_ie'])) { $is_ie = false; - if (strstr($this->brand, "Internet Explorer")) { - $is_ie = true; + if (strstr($this->brand, 'Internet Explorer')) { + $this->uadata['is_ie'] = true; } } - return $is_ie; + return $this->uadata['is_ie']; } function geckobased() { - // set it static so that we don't have to call it that often - static $is_gecko; - if (!isset($is_gecko)) { - $is_gecko = false; - if (strstr($this->uastring, "Gecko/")) { - $is_gecko = true; + if (!isset($this->uadata['is_gecko'])) { + $this->uadata['is_gecko'] = false; + if (strstr($this->uastring, 'Gecko/')) { + $this->uadata['is_gecko'] = true; } } - return $is_gecko; + return $this->uadata['is_gecko']; } function geckodate() { - // set it static so that we don't have to call it that often - static $gdate; - if (!isset($gdate)) { - $gdate = 0; - if (ereg("Gecko/([0-9]+)", $this->uastring, $regs)) { - $gdate = $regs[1]; + if (!isset($this->uadata['gdate'])) { + $this->uadata['gdate'] = 0; + if (ereg('Gecko/([0-9]+)', $this->uastring, $regs)) { + $this->uadata['gdate'] = $regs[1]; } } - return $gdate; + return $this->uadata['gdate']; } function khtmlbased() { - // set it static so that we don't have to call it that often - static $is_khtml; - if (!isset($is_khtml)) { - $is_khtml = false; - if (strstr($this->brand, "Konqueror") || strstr($this->brand, "Safari") || strstr($this->brand, "AppleWebKit")) { - $is_khtml = true; + if (!isset($this->uadata['is_khtml'])) { + $this->uadata['is_khtml'] = false; + if (strstr($this->brand, 'Konqueror') || strstr($this->brand, 'Safari') || strstr($this->brand, 'AppleWebKit')) { + $this->uadata['is_khtml'] = true; } } - return $is_khtml; + return $this->uadata['is_khtml']; } } -?> \ No newline at end of file +?>