Perl Scripts to Extract Rows and Columns from Many Excel Files

I had to aggregate rows and columns from about 100 MS Excel spreadsheet files from different sheets in the spreadsheets, they were reports filed by a hundred different offices, all the same format. Each extract was in a tab delimited format to throw into another spreadsheet. The first script combines all the rows in many Excel files into one file. The second script combines all the columns in many Excel files into one file. Why is this a common problem? Well MSExcel is a cheap and common report and analysis tool, everyone knows Excel and therefore everyone uses it. It is a common solution to have a bunch of people fill out a spread sheet, it is common to need to combine the spreadsheets into one file to analyze them.

The code is quick, dirty and ugly to boot. But it worked like a charm, in a couple hours of scripting I saved some people many many hours of screwing around with the 100 spreadsheets cutting pasting or making errors.

# Get rows from many Excel spreadsheets in a directory
###################################
#! /usr/local/bin/perl -w
    use strict;
    use Spreadsheet::ParseExcel::Simple;

my $excel_directory = 'Budget';
my $out_directory = 'xxxout';
opendir(EXCELDIR, $excel_directory) || die ("no excel directory");
my @excelfiles = readdir(EXCELDIR);
closedir(EXCELDIR);

chdir($excel_directory);
       my $LPHname;    # String to hold Local Public Health Name.
       my @sheetarray; # Array to hold the row.
       my $sheetcount; # Array element in the row.
       my $sheetname; # Name of the Excel spreadsheet.
       my $sheettemp;  # Temporary string to hold row for join.
       my $cellnumber;  # Cell number in the row.
       my $cellwanted;  # Cell number in the row.
       my $rowwanted;  # Row number wanted.
       my $county_namecell;  # Cell for county name.
       my $county_namerow;  # Row for county name.
foreach my $exxfilename (@excelfiles){
   if ($exxfilename =~ /^\.+.*/) { next; }
   my $xls = Spreadsheet::ParseExcel::Simple->read($exxfilename);
   foreach my $sheet ($xls->sheets) {
       $sheetname= $sheet->{sheet}->{Name}; # Sheet Name
       if ($sheetname !~ '2007 Budget') { next; }
       $sheetcount=0;
       $county_namecell=11;
       $county_namerow=1;
#      $cellwanted=4;
       $rowwanted=11;

       while ($sheet->has_data) {
            my @data = $sheet->next_row;
            $sheetcount++;
         if ($sheetcount==$county_namerow){
            $cellnumber=0;
            foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $county_namecell ){next;};
                 $sheettemp=$sheetarray[$sheetcount];
#                 $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
                 $LPHname=$ttcell;
            }
         } 

#        if (($sheetcount < ($rowwanted-1)) || 
               ($sheetcount > ($rowwanted+7))){next;}
         if ($sheetcount != $rowwanted){next;};
         $cellnumber=0;
         $sheetarray[$sheetcount]=join("\t",$sheettemp,$LPHname);
         foreach my $ttcell (@data) {
            $cellnumber++;
#            if ($cellnumber != $cellwanted ){next;};
            $sheettemp=$sheetarray[$sheetcount];
            $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
         }
       }
    }
    foreach my $sheetline (@sheetarray){
        print $sheetline,"\n";
    }
}
exit


###############################################################
# Column extract.
# Get columns from many Excel spreadsheets in a directory
###############################################################

#! /usr/local/bin/perl -w
    use strict;
    use Spreadsheet::ParseExcel::Simple;


my $excel_directory = 'TEST';
opendir(EXCELDIR, $excel_directory) || die ("no excel directory");
my @excelfiles = readdir(EXCELDIR);
closedir(EXCELDIR);

chdir($excel_directory);
       my @sheetarray; # Array to hold the row.
       my $sheetcount; # Array element in the row.
       my $sheetname; # Name of the Excel spreadsheet.
       my $sheettemp;  # Temporary string to hold row for join.
       my $cellnumber;  # cell number in the row.
       my $cellwanted;  # cell number in the row.
       my $rowwanted;  # row number wanted.
       my $county_namecell;  # cell for county name.
       my $county_namerow;  # row for county name.
foreach my $exxfilename (@excelfiles){
    if ($exxfilename =~ /^\.+.*/) { next; }
    my $xls = Spreadsheet::ParseExcel::Simple->read($exxfilename);
    foreach my $sheet ($xls->sheets) {
       $sheetname= $sheet->{sheet}->{Name};


# Name the sheet to take stuff out of.
       if ($sheetname !~ '2007 Budget') { next; }
       $sheetcount=0;
       $county_namecell=11;
       $county_namerow=1;
       $cellwanted=2;
       $rowwanted=5;

       while ($sheet->has_data) {
            my @data = $sheet->next_row;
            $sheetcount++;
          if ($sheetcount==$county_namerow){
             $cellnumber=0;
             foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $county_namecell ){next;};
                $sheettemp=$sheetarray[$sheetcount];
                $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
             }
          }
          if (($sheetcount < ($rowwanted)) || ($sheetcount > ($rowwanted+5)))
             {next;}
#column boundary starting from rowwanted and getting cellwanted column.
#         if ($sheetcount != $rowwanted){next;};
             $cellnumber=0;
             foreach my $ttcell (@data) {
                $cellnumber++;
                if ($cellnumber != $cellwanted ){next;};
                $sheettemp=$sheetarray[$sheetcount];
                $sheetarray[$sheetcount]=join("\t",$sheettemp,$ttcell);
          }
       }
    }
}
foreach my $sheetline (@sheetarray){
    print $sheetline,"\n";
}
exit

Script to Find Referenced Files in HTML, ColdFusion, and Other Languages

The Problem: Missing References in ColdFusion Code

Some rip-off contractors dumped a pile of trash filled ColdFusion code on some totally naive (or crooked) project managers. Not a process ran without errors, including many 404 errors in the web server logs. The project manager told me to make it work. The code came in a big wad of multiple directory trees and many thousands of files, many types of file, no installation package, no build tools.

I decided to run a bunch of lint type searches on the ColdFusion files for each type of error, (like missing references,) to document the bugs. I found thousands of errors: missing file references, unused files, empty files, empty directories, as well as many coding errors that indicated the contractor never tested the code.

A project that uses common software engineering processes (specifications, testing to specifications, bug tracking, build tools, packaging code for release, regression testing and release testing) would not find much with a lint script, usually lint type programs give out a bunch of false positive errors, but on this bunch of code there were so many real errors I would have been happy to see a false positive.

Here is an example of a bash shell script to find missing referenced files from  tags in HTML, it is easy to modify for other file reference tags, like “link” tags for css, “script” tags for javascript, “form” tags, “a” tags, ColdFusion “cfinclude” tags, etc. And I did do that and found hundreds of missing references for every type of tag I checked. It is a quick and dirty script, but it was useful for the situation.

Missing File Reference Script for Tag Based Languages

#######################
# Note: use the HTML source of this page to copy the script!!
#       The browser hides some of the script when it interprets the HTML!!

# find files that do a call to  image file
# Uses (dirname, basename) to get paths

# Root directory of code files.
LOCATION=docs

find_img () {

cd ${LOCATION}
pwd

CHECK_FILES=$(find . -name "*.htm*" -exec grep -ci '\<img '="" {}="" \;="" -print="" |="" grep="" '^\.')="" for="" file="" in="" $check_files="" do="" references="$(nawk" \<\!\-\-="" ,="" \-\-\="">/    {next;} 
     /\<[Ii][Mm][Gg] /, /\>/ { 
                              sub(/^.*[Ss][Rr][Cc]=/,"");
                              $0=$1;
                              gsub(/"/,"");
                              print;}' ${FILE} | 
                                 egrep -i 'gif|jpg|jpeg|png' | sort -u  )

     for REF_FILE in ${REFERENCES}
     do
         DIRECTORYROOT=$(dirname ${FILE})
         ls ${DIRECTORYROOT}/${REF_FILE} >/dev/null 2>&1
         [ $? != 0 ] && print "file:${FILE}     missing:${REF_FILE}";
     done
done  
}

find_img

# Here are some other type of file references that can use
# a modified form of this script:
#cfinclude template = "../cfdocs/dochome.htm"
#link rel="prefetch" href="/images/big.jpeg"
#link rel="StyleSheet" href="CSS/default.css" type="text/css" 
#form method="get" action="/some/form_script/form_stuff.php"
#script src="missing_javascript_file.js" type="text/javascript"
#a href="missing_link" ...

ColdFusion SQL String In CFQUERY Tag Single Quote Problem

Escaped single quotes mess up SQL used in the cfquery tag as described in ColdFusion single quote string screwup.

The behavior described in the page above also affects ColdFusion MX 7: When a string variable is put into a cfquery tag any single quotes are changed to TWO single quotes, ( note, not a double quote). This makes creating SQL strings difficult because they do not work inside of the cfquery tag, for example a typical SQL string with should have a single quote around a string value but inside the cfquery tag the string would become:

select * from mytable where first_name=”Joe Bob”

NOTE: it is 2 single quotes not a “double quote”, this does not work as SQL. Worse yet, it looks like a “double quote” problem and so it is a masked error that is harder to figure out.

This error is one of the most annoying bugs I have seen in Cold Fusion, it was in ColdFusion 6.1 MX from years ago and still plagues every release. What is wrong with Adobe Systems that they let a fundamental problem like creating SQL scripts from string variables used in almost every Cold Fusion application be such a burden to developers for years and years? I always wondered why the construct of a simple string for SQL was such a convoluted pile of worm crap in all the Adobe docs for ColdFusion. The docs hide that it does not pass a single quote properly, no wonder PHP cleaned their clock and took the web scripting market years ago. So what that CF limps along as a JVM based language, it is a crappy JVM based language. Switch to JRuby if you want a JVM based language.

ColdFusion SQL String Single Quote Hack

My method: use the string function REReplace to change the two single quotes, ”, to one single quote, ‘, but this must happen inside the cfquery tag after the string is evaluated, not outside of the cfquery tag when the 2 single quotes are not yet present.

.cfset string_variable="select * from mytable where first_name='Joe Bob'">

.cfquery name="string_with_single_quote" datasource="schema_user" result="my_result">
            #REReplace(string_variable,"''","'","ALL")#
./cfquery>

I prefer the regular expression replace to the PreserveSingleQuotes function in ColdFusion because at least I have some idea what the regular expression is doing instead of just some black box. Big whoop, now you can make a string to pass SQL to an RDBMS with ColdFusion. Just do not forget this lame crutch or your application will be impossible to create. The convoluted ColdFusion documentation just does not cover this basic problem

Oracle Database Tuning, MySql, Postgresql, Informix: Quick and Dirty Database Performance Tuning the RDBMS

Quick results are the point of this article, not optimum effect, here are quick and dirty database performance tuning methods. Oracle database tuning, Informix tuning, DB2 tuning, Postgres, MySql tuning? It does not matter what RDBMS you use these days as database performance tuning is all pretty much the same or at least this method of finding and fixing database performance problems is the same. I would use the this same approach for Sybase tuning and SQL Server tuning though I have never administered either product. RDBMS tuning is pretty much all the same because database software is pretty much the same these days. Any feature that is useful in one RDBMS is quickly copied by other database software and database tuning is pretty similar across the products. All RDBMS (Relational Database Management Systems) have an engine, an optimizer, SQL, run on some OS, an applications interface to the RDBMS, most have stored procedures and so they are more similar than different.

What Is Addressed In This Article.

Database configuration, disk layout and the operating system are usually under the configuration management control of the database administrator and system administrator and this article addresses what you can control. Many applications are controlled by third party vendors and subject to processes that are outside the administrator’s control so applications are not addressed in detail in this article.

Here are the heuristics listed in order of effort expended:

Analyze the Whole System, Not Just the Database.

Switch your point of view to examine the whole system, not just the database. This may be the easiest and most effective tuning technique.

You may not have to tune the database at all. At one customer site, shipping labels were not generated fast enough; the multiple CPU Unix system was maxed for hours using all the I/O and CPU resources. “The database is too slow,” the customer told me. “Go tune it.” I looked at the database statistics and also the operating system statistics. That’s when I noticed many processes spawning per minute from the Unix “at” command (a basic batch scheduling utility).

It was clear that the database was not the cause of this problem. The application scheduled a batch job for each shipping label with the “at” command, which had the Unix OS thrashing when it tried to generate and schedule a hundred thousand batch jobs to make labels. After changing a few lines in a shell script to generate the labels without the “at” utility the label generation took a tenth of the time. The database was not changed.

Look at net traffic, OS stats, DB stats, I/O stats. The system is a whole, so compare the stats together to get a view of what the system is doing over an interval of time. The statistics many times reflect the boundary between layers of software. I/O stats may show database activity as shown by the OS, CPU context switches may show how other processes affect database processess. It is the whole system that must perform well, not just the database, so make a well tuned system by taking a wide look at what is going on.

The Database is Only One Layer in the System, Check the Layer Interfaces.

Keep in mind that the application is separate from the database and the database engine is separate from the OS it runs on and that OS connects to a network. This layer architecture is a way I think of the performance problems that tuning will fix. Many problems are where the layers interface or how well the system is integrated.

Track All the Problems that Affect Database Performance.

Make a list of down times, batch job failure, backup failure, slow performance, etc., in the network, OS, applications and the database. Look for patterns and relationships in the statistics and the failures. A system is a house of cards that needs a solid foundation. Lack of backups, lack of OS resources, network problems, a lack of security and poor configuration management may mask or compound any database performance problems. Tuning just the database is futile if the system crashes every hour. From the problem tracking list find the easiest things to fix with the most payoff. Usually there are a couple quick and effective things that can be done to get performance improvements.

Many times the quick and effective fixes may not be database problems but are a problem or mix of problems in other parts of the system or configuration management. Examples I have seen: OS file systems that are too full, shared memory misconfiguration, software integration problems between the OS, database and third party applications such as missing software patches or other dependencies, these can be seen a software layer interface problem. Lack of configuration management of software going into production systems is a problem that many overlook. Adding software to production systems without controls can degrade performance or stop systems altogether.

Check Configuration Files for Obvious Problems.

Again, check OS, network, application configurations and user configurations, not just the database configuration. Look up parameters that you do not understand. I have seen old configuration files from previous releases of software that used out of date parameters or lacked new parameters that make the software perform well. One system had new fast NIC cards set to run at an old low network speed, they forgot to add the new fast settings in the configuration file.

Read the release notes that come with the database software for OS configuration parameters on the hardware used and note the needed OS and application patches for the particular configuration being used. And and then make sure the database and OS match the release notes of the applications that are run on the system. This chain of dependencies can be crippling to performance if every link is not in place.

Run the Database Statistics Jobs to Help RDBMS Optimizer Performance.

One basic improvement to performance in databases such as Postgresql, Oracle, DB2 and Informix is the cost based optimizers that generate query plans by looking at statistics from the data such as number of rows in a table and distributions of values. But the statistics used by the optimizers are not usually generated by default and many times I have seen no statistics jobs run in a database installation and poor performance is a result.

Check that the database optimizer statistics are run as recommended. Oracle “ANALYZE” (release 7), “DBMS_STATS” (8+), Informix “UPDATE STATISTICS”, Postgesql “vacuum analyze” commands create the statistics used by the SQL optimizers to choose fast access methods to the data. Statistics only have to be run when the data changes enough to make a difference, running statistics jobs is resource intensive so remember to run statistics only when needed, the object is to improve and tune database performance not cripple it with excess statistics processes.

SQL Can Affect Performance by Magnitudes of 10.

Most RDBMS collect statistics from the RDBMS process memory to monitor the database while it is running. These statistics are accessed with functions that are presented as views and can be accessed with SQL. With PostgreSQL, the monitoring statistics functions and views have information about the SQL statements run, the number of times a table or index is scanned, and more. Same with Oracle, DB2, Informix and the rest. Use these monitoring statistics to find the SQL statements that are the real system problems. Typically, plenty of SQL statements perform badly but only a few are the real resource problems; find those few that cause the most problem and put your effort into fixing those.

Start by identifying what SQL is run and why the SQL is run. Sometimes reports are run at the wrong times on an OLTP application system or a batch SQL job can be easily rescheduled to not conflict with another application that is using lots of resources on a server.

Identify the SQL statements that are slow and schedule these to run at times that do not crush the rest of the system. Run ‘set explain on’, (Informix), or ‘set autotrace on;set timing on’, (Oracle), to see what the optimizer is doing to SQL that you are checking.

Fix Indexes That Affect the Bad SQL Code

Sometimes indexes are controlled by applications and cannot be administered easily. But usually there is no problem manipulating the indexing. The first thing is to check that indexes are set up for slow SQL statements, many times there is no index used by the problem SQL statement because the application or data distribution has changed but not indexing on the tables affected. I have seen magnitudes of 10 improvement in performance with the addition of a good index or adding an attribute to an index.

There are lots of quick fixes that can be checked for indexes. Does the index contain an attribute which has nulls in the domain? Then the index will not be as effective in comparisons, maybe a default value can be used to get rid of the nulls. Data attributes that have a small domain, uneven distribution of values, and nulls are not usually good choices for attributes of an index.

Here is an Oracle script to check the values of index attributes to evaluate performance. Adding an attribute with a larger domain of values or dropping attributes that are not useful or even dropping an index to reduce overhead are all options. Sometimes indexes need rebuilding if they are in tables that have changed a lot over an interval of time.

There are different types of indexes and index organization to affect performance problems. Bitmap indexes can be used for attributes with low cardinality especially if there is an uneven distribution across the attribute domain. Because of costly updates they are mostly used in tables that do not get updated but are read only such as a database warehouse application. I have had problems with the Oracle 7.3.4 optimizer recognizing a bitmap index without a hint which makes it almost worthless without changing the code in an application that may not be under the control of a database administrator.

Parallelism is Usually Default “Off” in a Database. Turn It On?

If you have multiple CPUs and many disks some degree of parallelism can be easily configured, but it is too much detail for each database to cover here. Check your manuals, environment variables and configuration files for both sorting and SQL parallelism. I have seen many database installations where there were easy to use parallelism features that noticeably improved performance after a couple line configuration file change and a restart. Some parallelism features can make performance problems though, carefully evaluate them and check that a particular parallel feature can be turned on for a particular time or set of transactions and turned off at other times.

Table Partitioning can Make a Large I/O Problem Small.

Table partitioning is a method of storing tables in separate parts of the disk to reduce the size of the data that needs to be accessed. Analysis of SQL that accesses large tables may reveal a table or index partition scheme that can get rid of a bottleneck by shrinking access to a small part of the table, the rest is partitioned into rarely accessed pieces that are basically dormant. An example is partitioning a table on a “date” attribute for queries that access data by date, if the table partition is by year or month and the usual query looks for recent dates then the rest of the table is not searched. I have seen a speed up of response by magnitudes of 10 for specific SQL statements using this technique. The more developed RDBMS products have several table and index partitioning methods. There is a cost to administer more partitions so evaluate this technique carefully.

Spread the Database Over Many Disks and I/O Channels for Parallel I/O.

– If you have a huge Hitachi or EMC disk array get a lot of cache on the disk array. Make sure you do not contend with other applications pounding the i/o on the array. Evenly cover the array with everything to spread the i/o and use the full array i/o capacity.

– If you have a smaller set of disks then do the traditional database disk configuration. Separate the system catalogs from the various types of logs in different database partitions. (Informix- logical and physical logs,) (Oracle- rollback, redo and archive logs,) there are similar schemes for all the rest of the types of RDBMS software. Separate very active tables from each other. Separate indexes from tables. Table and index partitioning over multiple disks will help parallelism and i/o performance, however, disk reorganization is a lot of work. Separate disk space for the database logs is probably the most payoff, followed by the most active tables, but this move should be researched carefully to assess a high enough return on the investment.

Prevention is Easier Than Fixing Problems.

Help the developers to stop future problems by getting involved in design and code reviews. Look for such things as many connections made and terminated to the database over a network when one connection can be made, or a connection pooling scheme could be used, lack of database error trapping, huge joins with gobs of I/O that are sent over networks to fat clients for additional processing. Show the developers how to analyze SQL for performance and avoid long transactions.

Your Perfectly Tuned System Just Changed

Systems change. Data changes, software is upgraded, new hardware and applications are added and old parts of the system are removed. A whole system approach to RDBMS tuning and administration helps you be aware of the changes, calculate the risks and anticipate problems.

An older version of this article: Quick and Dirty RDBMS Tuning is on O’Reilly’s Onlamp.com site.

Export Oracle Data Definition Language (DDL) Scripts and Extract Oracle DDL

Oracle is very anal retentive about getting usable and complete data definition language (DDL) out of its database servers before release 9i and still provides very limited schema utilities. This was written to extract the data definition language (DDL) for database development projects and configuration management and the end result is a runable DDL script to create a database or a user schema.

How to do it:
Create an Oracle exp (export) file without the data.
Use the Oracle imp (import) utility to get the DDL (the show option) from an Oracle exp (export) file to make a text file.
Then use the shell-awk filter below on the text file to create the DDL script can be used by sqlplus or some SQL interpreter.

#!/bin/ksh
# Make DDL usable for Oracle svrmgrl or sqlplus from the Oracle 
# export/import utilities.
# More stuff at http://www.sofbot.com and http://www.tc.umn.edu/~hause011/

# How to do it: Use Oracle imp (import) utility to get the DDL from  
# an Oracle exp (export) file.  Then filter textfile with this awk. 
# The resulting DDL can be used by sqlplus or used to convert 
# to a different database product.
 
# Example:
# exp name/password full=y rows=n ...
# imp name/password show=y full=y file=xxxnamexx.dmp > textfile
# ugly_DDL.ksh textfile > bunch_of_SQL.sql

# Note: to remove storage declarations uncomment the line:
# '# remove_tablespace' near the bottom of this script.

# NOTE: Configure which awk to use right after "# Main" below. 

# Known Bugs: 
# DATE: 1/3/1999 
# PROBLEM: Line to long for awk to process
# FIX: Use gawk instead of awk or nawk. Or convert to perl.

# DATE: 1/4/1999 
# PROBLEM: arnold.schommer@io-computer.de has shown that for 
#     PL/SQL code blocks, especially Package declarations,
#     if there are comments starting with -- , in some - not all - cases, 
#     the script joins the following row.
# FIX: None at this time.  A work-around would be to edit the output of the
#     PL/SQL code before running it in an interpreter.
#


# Find lines with beginning 'space doublequote' and length < 78.
# Remove beginning 'space doublequote' and end doublequote, add space at end
# as they end at whole tokens.
# Find lines with beginning 'space doublequote' and length = 78.
# Remove beginning 'space doublequote' and end doublequote
# Break at the beginning of DDL statements. 
get_statements () {
    $AWK '
      /^ "/ { if (length($0)==78) {gsub("\"$","",$0);} 
                 else  {gsub("\"$"," ",$0);};  
              gsub("^ \"","",$0); 
              sub("^CREATE ","\nCREATE ",$0);
              sub("^ALTER ","\nALTER ",$0);
              sub("^COMMENT ","\nCOMMENT ",$0);
#              sub("--","\n--",$0);
#              sub("PROCEDURE ","\nPROCEDURE ",$0);
#              sub("FUNCTION ","\nFUNCTION ",$0);
              sub("^GRANT ","\nGRANT ",$0);
              print }
    ' 
}

# Grab whole blank line delimited records, put a semicolon at the
# end of a record then join the broken tokens by removing \n in the records.
join_tokens () {
    $AWK '
          BEGIN { RS="" } 
          { gsub("\n","",$0); }
          { print $0";\n" }
    ' 
}

# Remove tablespace and extent declarations.
remove_tablespace () {
    $AWK '
         { sub(" PCTFREE.*;",";",$0); }
         { print }
    ' 
}

# Fix line lengths; sqlplus and vi have problems with long lines.
semicolon_break () {
    $AWK '
         /;.*/ && /CREATE TRIGGER/ { gsub(";",";\n",$0); }
         /;.*/ && /CREATE PACKAGE/ { gsub(";",";\n",$0); }
         /;.*/ && /CREATE FUNCTION/ { gsub(";",";\n",$0); }
         { print }
    ' 
}

# Fix line lengths; sqlplus and vi have problems with long lines.
comma_break () {
    $AWK '
         length($0)> 1024  { gsub(",",",\n",$0); }
         { print }
    ' 
}

# Usage message.
usage () {
    echo;
    echo 'Usage: '$0 ''
    echo;
    exit 
}


###################################
# Main
#
AWK=/usr/bin/awk; export AWK

test $# -eq 1 || usage

cat $1|
get_statements |
join_tokens |
#remove_tablespace |
semicolon_break |
comma_break

#############################################

Create a Database Instance in Oracle

Before 8i there was no easy way to get the scripts to create databases in Oracle. Like it was a state secret or something. Larry, you are a moron.

Oracle seems to have a lack of basic utilities, like schema extraction, etc. Some of this DDL can be seen in a “full” database export, but gee whiz, Oracle seems to revel in making it difficult to use their expensively licensed product.

#################### cut here ##########################
REM * Database instance create script used with Oracle 8.0.5
REM * Start the  instance (ORACLE_SID here must be set to ).
REM * Use svrmgrl so admin procedures get run correctly.
-- Shutdown instance, remove all datafiles, control files, lockfiles,
-- logfiles to rerun and recreate the same instance

set termout on
set echo on
spool create.log

connect internal
startup nomount pfile=/apps/oracle/product/8.0.5/dbs/initumnp.ora

create database "umnp"
    maxinstances 8
    maxlogfiles  32
    maxdatafiles 400
    character set "US7ASCII"
    datafile
	'/vol01/oradata/umnp/system01.dbf'	size  100M
    logfile 
	('/vol05/oradata/umnp/redo01.01',
	'/vol06/oradata/umnp/redo01.02') size 20M,
        group 2
        ('/vol05/oradata/umnp/redo02.01',
        '/vol06/oradata/umnp/redo02.02') size 20M,
        group 3
        ('/vol05/oradata/umnp/redo03.01',
        '/vol06/oradata/umnp/redo03.02') size 20M,
        group 4
        ('/vol05/oradata/umnp/redo04.01',
        '/vol06/oradata/umnp/redo04.02') size 20M,
        group 5
        ('/vol05/oradata/umnp/redo05.01',
        '/vol06/oradata/umnp/redo05.02') size 20M ;


@/apps/oracle/product/8.0.5/rdbms/admin/catalog.sql
@/apps/oracle/product/8.0.5/rdbms/admin/catproc.sql
@/apps/oracle/product/8.0.5/rdbms/admin/catparr.sql


create rollback segment temprbs
   tablespace system
   storage (initial 25K next 25K minextents 2 maxextents 99);

alter rollback segment temprbs online;

drop tablespace rollback01;
create tablespace rollback01 
 datafile '/vol07/oradata/umnp/rollback01_01.dbf' size 128M reuse
  default storage (initial 1m next 1m maxextents unlimited pctincrease 0);

drop tablespace temp01 ;
create tablespace temp01 
 datafile '/vol01/oradata/umnp/temp01_01.dbf' size 128M reuse
  default storage (initial 2m next 2m maxextents unlimited pctincrease 0) 
  temporary; 

drop tablespace user01 ;
create tablespace user01 
 datafile '/vol02/oradata/umnp/user01_01.dbf' size 100M reuse
  default storage (initial 100k next 100k maxextents unlimited pctincrease 0); 

create rollback segment R01
 tablespace rollback01
 storage (initial 1M next 1M optimal 10M
          minextents 20 maxextents 250);

create rollback segment R02
 tablespace rollback01
 storage (initial 1M next 1M optimal 10M
          minextents 20 maxextents 250);

create rollback segment R03
 tablespace rollback01
 storage (initial 1M next 1M optimal 10M
          minextents 20 maxextents 250);

create rollback segment R04
 tablespace rollback01
 storage (initial 1M next 1M optimal 10M
          minextents 20 maxextents 250);

create rollback segment R05
 tablespace rollback01
 storage (initial 1M next 1M optimal 10M
          minextents 20 maxextents 250);


alter rollback segment r01 online;
alter rollback segment r02 online;
alter rollback segment r03 online;
alter rollback segment r04 online;
alter rollback segment r05 online;

alter rollback segment temprbs offline;

REM * Alter SYS and SYSTEM users, because Oracle will make SYSTEM
REM * the default and temporary tablespace by default, and we don't
REM * want that.
REM *
alter user sys temporary tablespace temp01;
alter user system default tablespace user01 temporary tablespace temp01;

REM * Now run the Oracle-supplied scripts we need for this DB
REM *
@/apps/oracle/product/8.0.5/rdbms/admin/catblock.sql 
@/apps/oracle/product/8.0.5/rdbms/admin/catio.sql 
@/apps/oracle/product/8.0.5/rdbms/admin/dbmspool.sql

REM * All done, so close the log file and exit.
REM *
spool off
exit

################## cut here ###################################

Oracle Export and Import Compress Scripts

Here are two scripts that export and import databases and make compressed files for Oracle. Tested on Oracle 8.0.5 and Sun 2.7 If the resulting compressed file is too big for the file system you can run the file through the Unix utility “split” to make smaller files.

#!/bin/ksh
# Export with flags (full, consistent) to a compressed file that 
# is named by SID-month-day using Oracle exp
##############################################
# Main

# Configure environment
. oracle_environment.ksh   # Oracle environment.
export ORACLE_SID=XXX1

DIRECTORY=/apps/home/oracle/local; export DIRECTORY
FILENAME=${DIRECTORY}/Exports/${ORACLE_SID}-`date +%d`.dmp.gz; export FILENAME
LOG=${DIRECTORY}/Exports/${ORACLE_SID}-`date +%d`.log; export LOG
PIPE=${DIRECTORY}/export_pipe.dmp; export PIPE

test -p ${PIPE} || mknod ${PIPE} p
chmod +rw ${PIPE}

cat $PIPE | /usr/local/bin/gzip > ${FILENAME} &
${ORACLE_HOME}/bin/exp log=${LOG} buffer=10485760 file=${PIPE} consistent=yes fu
ll=yes << EOF
system/password 
EOF
#############################################################



#!/bin/ksh
# Oracle imp import for gzipped Oracle exp file.
##############################################
# Main

# Path of exp file.
FILENAME=$1; export FILENAME

PIPE=export_pipe.dmp; export PIPE
test -p ${PIPE} || mknod export_pipe.dmp p
chmod +rw ${PIPE}

/usr/local/bin/zcat ${FILENAME} > ${PIPE} &
${ORACLE_HOME}/bin/imp buffer=10485760 file=${PIPE} full=yes log=impzip.log << EOF
system/password
EOF
#########################################################