Apache access log is one of the best way to analyse if your application is working as expected or not. But there are a few changes those needs to be made to httpd.conf file so that the data can be dumped to MySQL table.
# cat /etc/httpd/conf/httpd.conf | grep LogFormat
#Old Format to be changed to the new format
#LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
LogFormat "\"%h\" \"%l\" \"%u\" \"%{%Y-%m-%d %H:%M:%S}t\" \"%r\" \"%>s\" \"%b\" \"%{Referer}i\" \"%{User-Agent}i\"" combined
use db_Name;
CREATE TABLE `access_log` (
`ip` varchar(100) NOT NULL default '',
`RFC` varchar(100) default NULL,
`user` varchar(100) default NULL,
`mytime` datetime default NULL,
`request` varchar(100) default NULL,
`response` int(11) NOT NULL default '0',
`bytes` bigint(20) NOT NULL default '0',
`referer` varchar(100) default NULL,
`browser` varchar(500) default NULL,
UNIQUE KEY `mygroup` (`ip`,`RFC`,`user`,`mytime`,`request`)
);
mysqlimport --ignore --fields-terminated-by ' ' --fields-enclosed-by '"' db_Name /var/log/httpd/access_log
Pages being accessed:
mysql> select distinct(substring_index(referer, '?', -1)) as link from access_log where (substring_index(referer, '?', -1)) like 'http%' order by link;
IP addresses accessing the application:
mysql> select ip, count(*) as cnt from access_log group by ip order by cnt desc;
_____
Once the access data is imported, you can create an error_code table to compare the response codes from access log and analyse.
CREATE TABLE `mycode` (
`id` int(11) NOT NULL auto_increment,
`response` int(11) default NULL,
`expl` varchar(100) default NULL,
`type` varchar(100) default NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=19 DEFAULT CHARSET=latin1;
INSERT INTO `mycode` VALUES (1,200,'OK','OK'),(2,202,'ACCEPTED','OK'),(3,201,'Created','OK'),(4,203,'Partial','OK'),(5,400,'Bad Request','error'),(6,401,'Unauthorized','error'),(7,402,'Payment Requested','error'),(8,403,'Forbidden','error'),(9,404,'Not Found','error'),(10,500,'Internal Error','error'),(11,501,'Not Implemented','error'),(12,502,'Service temporarily overloaded','error'),(13,503,'Gateway timeout','error'),(14,301,'Moved','OK'),(15,302,'Found','OK'),(16,303,'Modified','OK'),(17,304,'Not Modified','OK'),(18,405,'Method not allowed','error');
Page requests having error code:
mysql> select a.response, b.expl, b.type, count(*) as cnt from access_log as a inner join mycode as b on a.response = b.response where request not like '%favicon.ico%' group by response;
mysql> select a.response, a.request from access_log as a inner join mycode as b on a.response = b.response where b.type = 'error' and request not like '%favicon%' order by a.response;
Analysing access_log
Apache access log is one of the best way to analyse if your application is working as expected or not. But there are a few changes those needs to be made to httpd.conf file so that the data can be dumped to MySQL table.
# cat /etc/httpd/conf/httpd.conf | grep LogFormat
#Old Format to be changed to the new format
#LogFormat "%h %l %u %t \"%r\" %>s %b \"%{Referer}i\" \"%{User-Agent}i\"" combined
LogFormat "\"%h\" \"%l\" \"%u\" \"%{%Y-%m-%d %H:%M:%S}t\" \"%r\" \"%>s\" \"%b\" \"%{Referer}i\" \"%{User-Agent}i\"" combined
use db_Name;
CREATE TABLE `access_log` (
`ip` varchar(100) NOT NULL default '',
`RFC` varchar(100) default NULL,
`user` varchar(100) default NULL,
`mytime` datetime default NULL,
`request` varchar(100) default NULL,
`response` int(11) NOT NULL default '0',
`bytes` bigint(20) NOT NULL default '0',
`referer` varchar(100) default NULL,
`browser` varchar(500) default NULL,
UNIQUE KEY `mygroup` (`ip`,`RFC`,`user`,`mytime`,`request`)
);
mysqlimport --ignore --fields-terminated-by ' ' --fields-enclosed-by '"' db_Name /var/log/httpd/access_log
Pages being accessed:
mysql> select distinct(substring_index(referer, '?', -1)) as link from access_log where (substring_index(referer, '?', -1)) like 'http%' order by link;
IP addresses accessing the application:
mysql> select ip, count(*) as cnt from access_log group by ip order by cnt desc;
_____
Once the access data is imported, you can create an error_code table to compare the response codes from access log and analyse.
CREATE TABLE `mycode` (
`id` int(11) NOT NULL auto_increment,
`response` int(11) default NULL,
`expl` varchar(100) default NULL,
`type` varchar(100) default NULL,
PRIMARY KEY (`id`)
) ENGINE=MyISAM AUTO_INCREMENT=19 DEFAULT CHARSET=latin1;
INSERT INTO `mycode` VALUES (1,200,'OK','OK'),(2,202,'ACCEPTED','OK'),(3,201,'Created','OK'),(4,203,'Partial','OK'),(5,400,'Bad Request','error'),(6,401,'Unauthorized','error'),(7,402,'Payment Requested','error'),(8,403,'Forbidden','error'),(9,404,'Not Found','error'),(10,500,'Internal Error','error'),(11,501,'Not Implemented','error'),(12,502,'Service temporarily overloaded','error'),(13,503,'Gateway timeout','error'),(14,301,'Moved','OK'),(15,302,'Found','OK'),(16,303,'Modified','OK'),(17,304,'Not Modified','OK'),(18,405,'Method not allowed','error');
Page requests having error code:
mysql> select a.response, b.expl, b.type, count(*) as cnt from access_log as a inner join mycode as b on a.response = b.response where request not like '%favicon.ico%' group by response;
mysql> select a.response, a.request from access_log as a inner join mycode as b on a.response = b.response where b.type = 'error' and request not like '%favicon%' order by a.response;
_____
The following shell script can be used to import the data to a central server.
#!/bin/sh
# The following line will change the logformat to include response time
# sed -i 's/^LogFormat.*combined$/LogFormat "\\\"%h\\\" \\\"%l\\\" \\\"%u\\\" \\\"%{%Y-%m-%d %H:%M:%S}t\\\" \\\"%r\\\" \\\"%>s\\\" \\\"%b\\\" \\\"%{Referer}i\\\" \\\"%{User-Agent}i\\\" \\\"%D\\\" \\\"%T\\\" \\\"%q\\\" \\\"%f\\\" \\\"%v\\\" " combined/' /etc/httpd/conf/httpd.conf
# find database name
mydb=$(mysqlshow -uroot -ptrimax | awk '{print $2}' | egrep -v 'Databases|information_schema|mysql|test|lost\+found|freshnew1' | sed '/^$/d')
# add the database name to the end of the log
sed "s/$/\"$mydb\"/" /var/log/httpd/access_log | tail > /home/develop/access_log
# import the log to a central server
mysqlimport -h111.222.333.444 -uroot -pPassWord --local --ignore --fields-terminated-by ' ' --fields-enclosed-by '"' DBName /home/develop/access_log
_____
1) Change the logformat so that fields are terminated by pipe "|"
2) Create a table in mysql and load the data.
3) Upload log file to S3 and then import it to Redshift - data warehouse software.
cp /etc/apache2/apache2.conf /etc/apache2/apache2.conf.back
sed -i 's/^LogFormat.*combined$/LogFormat "%h | %l | %u | %{%Y-%m-%d %H:%M:%S}t | %r | %>s | %O | %{Referer}i | %{User-Agent}i | %B | %D | %T | %q | %f | %v " combined/' /etc/apache2/apache2.conf
/etc/init.d/apache2 restart
_____
use test;
drop table access;
CREATE TABLE `access` (
`ip` varchar(100) NOT NULL default '',
`RFC` varchar(100) default NULL,
`user` varchar(100) default NULL,
`request_time` datetime default NULL,
`request_first_line` varchar(255) default NULL,
`status` int(11) NOT NULL default '0',
`bytes_sent` bigint(20) NOT NULL default '0',
`referer` varchar(100) default NULL,
`user_agent` varchar(500) default NULL,
`bytes_sent_exc_header` bigint(20),
`response_time_microsec` bigint(20),
`response_time_sec` bigint(20),
`query_string` varchar(255),
`request_file` varchar(255),
`server_name` varchar(255),
UNIQUE KEY `mygroup` (`ip`,`RFC`,`user`,`request_time`,`request_first_line`)
);
load data infile 'access.log' into table access fields terminated by '|';
_____
# copy the file to S3 and then delete or move the file to a different location
s3cmd put access.log.2.gz s3://Aug26/access.log.2.aug26_11_19.gz
rm access.log.2.gz
Labels: mysql tips