Login Register Actian.com  

Actian Community Blogs

Rate this Entry

Baseline Results (Part 2)

Submit "Baseline Results (Part 2)" to Digg Submit "Baseline Results (Part 2)" to del.icio.us Submit "Baseline Results (Part 2)" to StumbleUpon Submit "Baseline Results (Part 2)" to Google
Posted 2010-04-03 at 09:05 AM by rhann
Updated 2010-04-03 at 09:21 AM by rhann (Added note re overhead)

Continued from Part 1.

Q_MAXPAGE
Code:
SELECT FIRST 50 ps.page_id, max(ps.page_count) AS mx
FROM pagestat ps JOIN datesinfo di ON di.id=ps.date_id 
WHERE di.calmonth=2 
  AND di.calyear=2010 
  AND di.calday=18 
GROUP BY ps.page_id
ORDER BY mx DESC;
0.1704 secs @ 6.25M rows
0.3197 secs @ 12.5M rows
0.6166 secs @ 25M rows
1.2251 secs @ 50M rows

Q_10000HITS_1
Code:
SELECT FIRST 50 ps.page_id, max(ps.page_count) AS mx
FROM pagestat ps JOIN datesinfo di ON di.id=ps.date_id  
WHERE di.calmonth=3 
  AND di.calyear=2010 
  AND di.calday=3 
GROUP BY ps.page_id
HAVING max(ps.page_count) > 10000;
0.1748 secs @ 6.25M rows
0.3281 secs @ 12.5M rows
0.6367 secs @ 25M rows
1.2785 secs @ 50M rows

Q_10000HITS_2
Code:
SELECT FIRST 50  pg.page, t.mx 
FROM pages pg JOIN (SELECT ps.page_id, max(ps.page_count) AS mx  
                    FROM pagestat ps JOIN datesinfo di ON di.id=ps.date_id 
                    WHERE di.calmonth=3 
                      AND di.calyear=2010 
                      AND di.calday=3   
                    GROUP BY ps.page_id  
                    HAVING max(page_count) > 10000) t ON t.page_id=pg.id;
0.2647 secs @ 6.25M rows
0.5372 secs @ 12.5M rows
1.0556 secs @ 25M rows
2.0024 secs @ 50M rows

Q_NOACCESS_SECOND_DAY
Code:
SELECT pg.page 
FROM pages pg JOIN (SELECT t1.page_id 
                    FROM (SELECT ps1.page_id,sum(ps1.page_count) AS sm 
                          FROM pagestat ps1 JOIN datesinfo di1 ON di1.id=ps1.date_id 
                          WHERE di1.calmonth=2 
                            AND di1.calyear=2010 
                            AND di1.calday=16 
                            AND ps1.project_id=1524 
                          GROUP BY ps1.page_id 
                          HAVING sum(page_count)>1000) t1  -- pages accessed more than 1000 times on Day 1
                    LEFT JOIN (SELECT ps2.page_id 
                               FROM pagestat ps2 JOIN datesinfo di2 ON di2.id=ps2.date_id
                               WHERE di2.calmonth=2 
                                 AND di2.calyear=2010 
                                 AND di2.calday=17 
                                 AND ps2.project_id=1524) t2 -- pages accessed on Day 2
                    ON t2.page_id=t1.page_id  
                    WHERE t2.page_id IS NULL) bq 
ON pg.id=bq.page_id;
0.1018 secs @ 6.25M rows
0.1469 secs @ 12.5M rows
0.2398 secs @ 25M rows
0.4059 secs @ 50M rows

Q_PAGE_MONTHSTATS
Code:
SELECT di.caldate, sum(ps.page_count) 
FROM pagestat ps JOIN datesinfo di ON ps.date_id = di.id
                 JOIN pages pg ON ps.page_id=pg.id
WHERE pg.page='Yugopolis' 
  AND di.calmonth=2 
  AND di.calyear=2010 
GROUP BY caldate 
ORDER BY caldate;
0.2333 secs @ 6.25M rows
0.4281 secs @ 12.5M rows
0.7764 secs @ 25M rows
1.4091 secs @ 50M rows

Q_PROJECT_AVG
Code:
SELECT pj.project, AVG(ps.page_count) 
FROM pagestat ps JOIN datesinfo di ON ps.date_id=di.id
                 JOIN projects pj ON ps.project_id=pj.id
WHERE calmonth=2 
  AND calyear=2010 
GROUP by project;
0.7520 secs @ 6.25M rows
1.6324 secs @ 12.5M rows
2.9139 secs @ 25M rows
5.9650 secs @ 50M rows

Q_PAGES_PROJECT
Code:
SELECT FIRST 100 ps.page_id, count(distinct ps.project_id) 
FROM pagestat ps JOIN datesinfo di ON di.id=ps.date_id 
WHERE di.calmonth=2 
  AND di.calyear=2010 
GROUP BY ps.page_id 
HAVING count(distinct ps.project_id) > 10;
6.0733 secs @ 6.25M rows
13.1317 secs @ 12.5M rows
30.4078 secs @ 25M rows
68.2658 secs @ 50M rows

Q_HOURS_DROP
Code:
SELECT FIRST 100 ps1.page_id, ps1.page_count 
FROM pagestat ps1 
    JOIN datesinfo di1 ON ps1.date_id = di1.id 
    JOIN pagestat ps2 ON ps2.page_id=ps1.page_id 
    JOIN  datesinfo di2 ON ps2.date_id = di2.id 
WHERE di1.caldate='2010-02-16' 
  AND di1.dayhour=14 
  AND di2.caldate='2010-02-16' 
  AND di2.dayhour=15 
  AND ps1.project_id=1524 
  AND ps2.project_id=1524 
  AND ps1.page_count > 2 * ps2.page_count 
  AND ps1.page_count>=1000;
0.0962 secs @ 6.25M rows
0.1753 secs @ 12.5M rows
0.3361 secs @ 25M rows
0.7419 secs @ 50M rows

The next couple of queries are based on an intermediate table called pagestat_daily constructed as follows:

Code:
CREATE TABLE pagestat_daily 
(
   page_id int NOT NULL,
   date_id_from smallint  NOT NULL,
   date_id_to smallint  NOT NULL,
   project_id smallint  NOT NULL,
   page_count int  NOT NULL 
) WITH STRUCTURE=VECTORWISE;

INSERT INTO pagestat_daily (project_id, page_id, date_id_from, date_id_to, page_count ) 
SELECT ps.project_id, ps.page_id, min(ps.date_id), max(ps.date_id), sum(ps.page_count) 
FROM pagestat ps JOIN datesinfo di ON ps.date_id=di.id 
WHERE calmonth=2 
  AND calyear=2010 
GROUP BY ps.project_id, ps.page_id, di.caldate;
Q_DATE_RANGE
Code:
SELECT pj.project, sum(pd.page_count) AS sm 
   FROM pagestat_daily pd 
    JOIN datesinfo dstart ON pd.date_id_from=dstart.id 
    JOIN datesinfo dend ON pd.date_id_to=dend.id 
    JOIN projects pj ON pd.project_id=pj.id
WHERE dstart.caldate>='2010-02-15' 
  AND dend.caldate <= '2010-02-20' 
GROUP BY pj.project 
HAVING sum(pd.page_count) > 1000 
ORDER BY sm DESC;
0.6308 secs @ 6.25M rows
1.2273 secs @ 12.5M rows
2.3717 secs @ 25M rows
4.3561 secs @ 50M rows

Q_PAGE_DATE_RANGE
Code:
SELECT FIRST 100 pg.page, dt.sm FROM pages pg, 
   ( SELECT pd.page_id, sum(pd.page_count) AS sm 
     FROM pagestat_daily pd JOIN datesinfo dstart ON pd.date_id_from=dstart.id 
                            JOIN datesinfo dend ON pd.date_id_to=dend.id 
     WHERE dstart.caldate>='2010-02-15' 
       AND dend.caldate <= '2010-02-20' 
   GROUP BY pd.page_id ) dt 
WHERE pg.id=dt.page_id 
ORDER BY sm DESC;
2.4418 secs @ 6.25M rows
4.9174 secs @ 12.5M rows
9.1677 secs @ 25M rows
17.9668 secs @ 50M rows

Q_HIT_SPIKE
Code:
SELECT FIRST 10 ps.page_id, MAX(ps.page_count/pd.page_count) AS max_daily_spikiness
FROM pagestat ps INNER JOIN pagestat_daily pd ON ps.page_id=pd.page_id
                        AND ps.date_id BETWEEN pd.date_id_from AND pd.date_id_to
GROUP BY ps.page_id
ORDER BY max_daily_spikiness DESC;
7.7353 secs @ 6.25M rows
21.7507 secs @ 12.5M rows
65.8813 secs @ 25M rows
215.3931 secs @ 50M rows

QC_POINT_FACT
Code:
SELECT * 
FROM pagestat ps1 JOIN datesinfo di1 ON ps1.date_id = di1.id 
WHERE page_id=11610 
  AND project_id=1524 
  AND di1.calday=7 
  AND di1.calmonth=2 
  AND di1.calyear=2010 
  AND di1.dayhour=1;
0.1328 secs @ 6.25M rows
0.2115 secs @ 12.5M rows
0.3747 secs @ 25M rows
0.6587 secs @ 50M rows

QC_POINT_PAGE
Code:
SELECT * FROM pages pg WHERE pg.id=2065866;
0.0066 secs @ 6.25M rows
0.0068 secs @ 12.5M rows
0.0070 secs @ 25M rows
0.0069 secs @ 50M rows

Here are all the results again, in a more concise format for future reference. (Remember there is about 0.0017 seconds overhead included in these.):

Code:
+------------------------+---------+---------+---------+---------+
|query                   |6.25m row|12.5m row|25m rows |50m rows |
+------------------------+---------+---------+---------+---------+
|Q_10000HITS_2           |   0.2647|   0.5372|   1.0556|   2.0024|
|Q_1000HITS_1            |   0.1748|   0.3281|   0.6367|   1.2785|
|Q_DATE_RANGE            |   0.6308|   1.2273|   2.3717|   4.3561|
|Q_DAYSTAY               |   0.3245|   0.6406|   1.2530|   2.4830|
|Q_DAYWEEKSTAT           |   0.1681|   0.3212|   0.6162|   1.2079|
|Q_HIT_SPIKE             |   7.7353|  21.7507|  65.8813| 215.3931|
|Q_HOURPROJECTSTAT       |   0.1542|   0.2881|   0.5477|   1.0783|
|Q_HOURS_DROP            |   0.0962|   0.1753|   0.3361|   0.7419|
|Q_MAXPAGE               |   0.1704|   0.3197|   0.6166|   1.2251|
|Q_NOACCESS_SECOND_DAY   |   0.1018|   0.1469|   0.2398|   0.4059|
|Q_NOPROJECTS            |   0.3367|   0.8100|   1.3274|   2.7934|
|Q_NOPROJECTS_1          |   0.1123|   0.2107|   0.3935|   0.7832|
|Q_PAGES_PROJECT         |   6.0733|  13.1317|  30.4078|  68.2658|
|Q_PAGE_DATE_RANGE       |   2.4418|   4.9174|   9.1677|  17.9668|
|Q_PAGE_MONTHSTATS       |   0.2333|   0.4281|   0.7764|   1.4091|
|Q_POINT_FACT            |   0.1328|   0.2115|   0.3747|   0.6587|
|Q_POINT_PAGE            |   0.0066|   0.0068|   0.0070|   0.0069|
|Q_PROJECT_AVG           |   0.7520|   1.6324|   2.9139|   5.9650|
|Q_PROJECT_SUM           |   0.1906|   0.5411|   0.7768|   1.7320|
|Q_TOP50                 |   1.0425|   2.1400|   3.6009|   6.4686|
|Q_TOP_PROJECTS          |   0.7331|   1.5993|   2.8464|   5.8379|
|Q_UNIQ                  |   4.0147|   9.0174|  20.7797|  47.6740|
+------------------------+---------+---------+---------+---------+
Posted in VectorWise
Views 17318 Comments 0 Edit Tags

« Prev     Main     Next »

Total Comments 0

Comments

 
© 2011 Actian Corporation. All Rights Reserved