<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<?xml-stylesheet href="http://www.blogger.com/styles/atom.css" type="text/css"?>

<feed xmlns="http://purl.org/atom/ns#" version="0.3" xml:lang="en-US">
<link href="https://www.blogger.com/atom/12660780" rel="service.post" title="Data Warehouse Performance Tuning &amp; Optimization" type="application/atom+xml"/>
<link href="https://www.blogger.com/atom/12660780" rel="service.feed" title="Data Warehouse Performance Tuning &amp; Optimization" type="application/atom+xml"/>
<title mode="escaped" type="text/html">Data Warehouse Performance Tuning &amp; Optimization</title>
<tagline mode="escaped" type="text/html">Tips for performance tuning and optimization</tagline>
<link href="http://www.atlogic.com/blog/" rel="alternate" title="Data Warehouse Performance Tuning &amp; Optimization" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780</id>
<modified>2006-02-25T04:05:51Z</modified>
<generator url="http://www.blogger.com/" version="5.15">Blogger</generator>
<info mode="xml" type="text/html">
<div xmlns="http://www.w3.org/1999/xhtml">This is an Atom formatted XML site feed. It is intended to be viewed in a Newsreader or syndicated to another site. Please visit the <a href="http://help.blogger.com/bin/answer.py?answer=697">Blogger Help</a> for more info.</div>
</info>
<convertLineBreaks xmlns="http://www.blogger.com/atom/ns#">false</convertLineBreaks>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111768254575932642" rel="service.edit" title="DB2 Configuration Parameters for Data Warehousing" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-06-01T23:22:25-04:00</issued>
<modified>2005-06-02T03:22:25Z</modified>
<created>2005-06-02T03:22:25Z</created>
<link href="http://www.atlogic.com/blog/2005/06/db2-configuration-parameters-for-data.htm" rel="alternate" title="DB2 Configuration Parameters for Data Warehousing" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111768254575932642</id>
<title mode="escaped" type="text/html">DB2 Configuration Parameters for Data Warehousing</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">An essential part of the performance tuning process is verifying that your database is configured properly. Scott Hayes and Philip Gunning provide detailed tips for setting DB2 configuration parameters in <a href="http://www.db2mag.com/db_area/archives/2002/q3/hayes.shtml">Tuning Up for OLTP and Data Warehousing</a>.</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111699131174797241" rel="service.edit" title="Measure, Improve, Repeat" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-24T23:21:51-04:00</issued>
<modified>2005-05-25T03:21:51Z</modified>
<created>2005-05-25T03:21:51Z</created>
<link href="http://www.atlogic.com/blog/2005/05/measure-improve-repeat.htm" rel="alternate" title="Measure, Improve, Repeat" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111699131174797241</id>
<title mode="escaped" type="text/html">Measure, Improve, Repeat</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">Scott Hayes wrote <a href="http://www.db2mag.com/story/showArticle.jhtml;?articleID=161601940">Measure, Improve, Repeat</a>, an article that covers some practical advice on tuning DB2. The article does not focus on data warehouse applications, but some of the tips are still valid. I found his tip about getting all performance data with just one command particularly useful:

$ db2 "get snapshot for all on DBNAME" &gt; allsnap.txt</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111699023205675533" rel="service.edit" title="Teradata data compression" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-24T23:03:00-04:00</issued>
<modified>2005-05-25T03:03:52Z</modified>
<created>2005-05-25T03:03:52Z</created>
<link href="http://www.atlogic.com/blog/2005/05/teradata-data-compression.htm" rel="alternate" title="Teradata data compression" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111699023205675533</id>
<title mode="escaped" type="text/html">Teradata data compression</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">One of the interesting ways performance can be improved in Teradata is through <a href="http://www.teradataforum.com/l020829a.htm">data compression</a>. The linked article mentions some of the details behind data compression.</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111661208339239678" rel="service.edit" title="Aggregate Tables" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-20T14:01:00-04:00</issued>
<modified>2006-02-25T04:05:51Z</modified>
<created>2005-05-20T18:01:23Z</created>
<link href="http://www.atlogic.com/blog/2005/05/aggregate-tables.html" rel="alternate" title="Aggregate Tables" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111661208339239678</id>
<title mode="escaped" type="text/html">Aggregate Tables</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">I'll be gathering all my notes on aggregate tables in this post.<br/>
<br/>
<strong>What are aggregate tables?</strong>
<br/>
<strong/>
<br/>Aggregate tables, also know as summary tables, are fact tables which contain data that has been summarized up to a different level of detail. For example, let's say that your data warehouse contains a transaction table with the following characteristics (I'll use a banking example):<br/>
<br/>Table dimensionality: account id, transaction type, day id, transaction amount<br/>Average number of transactions per day: 30 million<br/>Number of days stored in the transaction table: 30<br/>Approximate number of rows: 900 million rows<br/>
<br/>Let's pretend that half of the daily transactions are deposits, so there are approximately 450 million rows that represent deposit transactions. The other half are withdrawals.<br/>
<br/>Suppose a DW user wants to know how much money was deposited into the bank during the past month. The user, through the reporting software, will issue a query similar to:<br/>
<br/>select sum(transaction_amount)<br/>from transaction_fact<br/>where transaction_type='deposit'<br/>
<br/>Pretend that your DW platform can scan 10 million rows per second; therefore, the approximate time to complete the query will be:<br/>
<br/>query time = number of rows / scan rate<br/>
<br/>which in our example translates into:<br/>
<br/>query time = 900 million rows / 10 million rows/second<br/>
<br/>query time = 90 seconds<br/>
<br/>Waiting 90 seconds for such a simple query is simply unacceptable, so here is where an aggregate table can help you. To answer our hypothetical question, we will build an aggregate table which summarizes the transaction table by transaction type. The aggregate may be defined as follows:<br/>
<br/>create table fact_transaction_aggregate as<br/>select day_id, transaction_type, sum(transaction_amount) as transaction_amount<br/>from transaction_fact<br/>group by day_id, transaction_type<br/>
<br/>We said before that there are only two transaction types and thirty days of data. Using the simplifying assumption that half of the daily transactions are deposits and half are withdrawals, the size of the new table will be only 60 rows! (table size = 30 days * 2 transaction types)<br/>
<br/>The SQL needed to get the answer is:<br/>
<br/>select sum(transaction_amount)<br/>from transaction_aggregate<br/>where transaction_type='deposit'<br/>
<br/>And the answer will come back in 0.000006 seconds (60 rows / 10 million rows /second). The result: happy users!</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111633782221881847" rel="service.edit" title="Up and Running with DB2 UDB ESE: Partitioning for Performance" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-17T09:48:00-04:00</issued>
<modified>2005-05-17T13:50:22Z</modified>
<created>2005-05-17T13:50:22Z</created>
<link href="http://www.atlogic.com/blog/2005/05/up-and-running-with-db2-udb-ese.htm" rel="alternate" title="Up and Running with DB2 UDB ESE: Partitioning for Performance" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111633782221881847</id>
<title mode="escaped" type="text/html">Up and Running with DB2 UDB ESE: Partitioning for Performance</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">This IBM e-book is a must-read if you're using DB2 UDB 8.1. <a href="http://www.redbooks.ibm.com/abstracts/sg246917.html?Open">IBM Redbooks | Up and Running with DB2 UDB ESE: Partitioning for Performance in an e-Business Intelligence World</a>

It discusses:
<ul>   <li>Guidelines on building the large database and determining the number of partitions </li>   <li>Bulk load using the new multipartition load </li>   <li>Performance enhancements using MultiDimensional Clustering and Materialized Query Tables.</li>   <li>Availability through the new online utilities</li>   <li>Self Managing And Resource Tuning features</li>   <li>Migration scenarios</li> </ul>
</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111590742264223807" rel="service.edit" title="BizGres" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-12T10:17:00-04:00</issued>
<modified>2006-02-25T03:59:40Z</modified>
<created>2005-05-12T14:17:02Z</created>
<link href="http://www.atlogic.com/blog/2005/05/bizgres.html" rel="alternate" title="BizGres" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111590742264223807</id>
<title mode="escaped" type="text/html">BizGres</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">Some hints about the hardware configuration used for BizGres/Greenplum: <a href="http://www.bizgres.org/blog/?p=4">CPUs and the cheap disk revolution</a>
</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111590719279886312" rel="service.edit" title="Greenplum - Products - DeepGreen Clustering Edition" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-12T10:13:12-04:00</issued>
<modified>2005-05-12T14:13:12Z</modified>
<created>2005-05-12T14:13:12Z</created>
<link href="http://www.atlogic.com/blog/2005/05/greenplum-products-deepgreen.htm" rel="alternate" title="Greenplum - Products - DeepGreen Clustering Edition" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111590719279886312</id>
<title mode="escaped" type="text/html">Greenplum - Products - DeepGreen Clustering Edition</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">Open source data warehousing platform using an approach that seems very close to Netezza's: <a href="http://www.greenplum.com/prod_deepgreen_cluster.html">Greenplum - Products - DeepGreen Clustering Edition</a>
</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111590709416593305" rel="service.edit" title="BizGres" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-12T10:11:34-04:00</issued>
<modified>2005-05-12T14:11:34Z</modified>
<created>2005-05-12T14:11:34Z</created>
<link href="http://www.atlogic.com/blog/2005/05/bizgres.htm" rel="alternate" title="BizGres" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111590709416593305</id>
<title mode="escaped" type="text/html">BizGres</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">The arrival of open source business intelligence? <a href="http://www.bizgres.org/">BizGres</a>
</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111584204443752360" rel="service.edit" title="Hardware Tuning Methodology" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-11T18:55:00-04:00</issued>
<modified>2005-05-11T20:16:22Z</modified>
<created>2005-05-11T20:07:24Z</created>
<link href="http://www.atlogic.com/blog/2005/05/hardware-tuning-methodology.htm" rel="alternate" title="Hardware Tuning Methodology" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111584204443752360</id>
<title mode="escaped" type="text/html">Hardware Tuning Methodology</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">This post is where I'll be collecting my thoughts about a performance tuning methodology for hardware.

The purpose of the methodology is to improve query response in the most time and cost efficient manner. The steps that need to be followed are:

<ol>   <li>Identify the bottleneck: using OS and RDBMS monitoring tools, figure out if query response is limited by I/O, memory, or CPU speed.</li>   <li>If I/O is the problem, check if there is any way to tune the OS to improve performance. If not, check if installing faster hard-drives or more controllers could help.</li>   <li>If memory is the bottleneck, first figure out if adding additional memory would improve performance. Then check if tuning OS or RDBMS memory settings would help. I'm working under the assumption that adding RAM is cheap, whereas tuning parameters takes time/money because of all the testing and specialized knowledge required.</li>   <li>If CPU speed is the bottleneck, check if adding more CPUs is an option. How well does your hardware/OS/RDBMS scale with more CPUs? Can you upgrade to faster CPUs?</li> </ol>
</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111583895195943561" rel="service.edit" title="NewsForge | Comparing MySQL performance" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-11T15:15:51-04:00</issued>
<modified>2005-05-11T19:15:51Z</modified>
<created>2005-05-11T19:15:51Z</created>
<link href="http://www.atlogic.com/blog/2005/05/newsforge-comparing-mysql-performance.htm" rel="alternate" title="NewsForge | Comparing MySQL performance" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111583895195943561</id>
<title mode="escaped" type="text/html">NewsForge | Comparing MySQL performance</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">Not directly related to data warehousing, but a nice performance comparison of MySQL performance on multiple operating systems: <a href="http://www.newsforge.com/article.pl?sid=04/12/27/1243207">Comparing MySQL performance</a>

Check also the methodology used to get the results: <a href="http://software.newsforge.com/software/04/12/27/1238216.shtml?tid=72&amp;tid=29">Using MySQL to benchmark OS performance</a>
</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111575520763530197" rel="service.edit" title="Amazon.com: Books: High Performance Oracle Data Warehousing: All You Need to Master Professional Database Development Using Oracle" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-10T16:00:07-04:00</issued>
<modified>2005-05-10T20:00:07Z</modified>
<created>2005-05-10T20:00:07Z</created>
<link href="http://www.atlogic.com/blog/2005/05/amazoncom-books-high-performance.htm" rel="alternate" title="Amazon.com: Books: High Performance Oracle Data Warehousing: All You Need to Master Professional Database Development Using Oracle" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111575520763530197</id>
<title mode="escaped" type="text/html">Amazon.com: Books: High Performance Oracle Data Warehousing: All You Need to Master Professional Database Development Using Oracle</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">Reading list: <a href="http://www.amazon.com/exec/obidos/tg/detail/-/1576101541/104-8082863-4210367?v=glance">Amazon.com: Books: High Performance Oracle Data Warehousing: All You Need to Master Professional Database Development Using Oracle</a>
</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
<entry xmlns="http://purl.org/atom/ns#">
<link href="https://www.blogger.com/atom/12660780/111575511137079317" rel="service.edit" title="SQL Server Performance Guide for Data Warehousing" type="application/atom+xml"/>
<author>
<name>Miguel Barrientos</name>
</author>
<issued>2005-05-10T15:58:31-04:00</issued>
<modified>2005-05-10T19:58:31Z</modified>
<created>2005-05-10T19:58:31Z</created>
<link href="http://www.atlogic.com/blog/2005/05/sql-server-performance-guide-for-data.htm" rel="alternate" title="SQL Server Performance Guide for Data Warehousing" type="text/html"/>
<id>tag:blogger.com,1999:blog-12660780.post-111575511137079317</id>
<title mode="escaped" type="text/html">SQL Server Performance Guide for Data Warehousing</title>
<content type="application/xhtml+xml" xml:base="http://www.atlogic.com/blog/" xml:space="preserve">
<div xmlns="http://www.w3.org/1999/xhtml">Reading list: <a href="http://www.microsoft.com/resources/documentation/sql/2000/all/reskit/en-us/part5/c2061.mspx">SQL Server Performance Guide for Data Warehousing</a>
</div>
</content>
<draft xmlns="http://purl.org/atom-blog/ns#">false</draft>
</entry>
</feed>

