Search This Blog

2010-04-01

how-to split file into file chunks based on the number of lines per file



namespace Gaf.IO
{
using System.IO;
 
/// <summary>
/// Summary for the Files class
/// <source>Subsonic 2.1 final</source>
/// </summary>
public static class FileUtility
{
/// <summary>
/// Read a text file and obtain it's contents.
/// </summary>
/// <param name="absolutePath">The complete file path to write to.</param>
/// <returns>String containing the content of the file.</returns>
public static string GetFileText(string absolutePath)
{
using (StreamReader sr = new StreamReader(absolutePath))
return sr.ReadToEnd();
}
 
/// <summary>
/// Creates or opens a file for writing and writes text to it.
/// </summary>
/// <param name="absolutePath">The complete file path to write to.</param>
/// <param name="fileText">A String containing text to be written to the file.</param>
public static void CreateToFile(string absolutePath, string fileText)
{
using (StreamWriter sw = File.CreateText(absolutePath))
sw.Write(fileText);
}
 
/// <summary>
/// Update text within a file by replacing a substring within the file.
/// </summary>
/// <param name="absolutePath">The complete file path to write to.</param>
/// <param name="lookFor">A String to be replaced.</param>
/// <param name="replaceWith">A String to replace all occurrences of lookFor.</param>
public static void UpdateFileText(string absolutePath, string lookFor, string replaceWith)
{
string newText = GetFileText(absolutePath).Replace(lookFor, replaceWith);
WriteToFile(absolutePath, newText);
}
 
/// <summary>
/// Writes out a string to a file.
/// </summary>
/// <param name="absolutePath">The complete file path to write to.</param>
/// <para0m name="fileText">A String containing text to be written to the file.</param>
public static void WriteToFile(string absolutePath, string fileText)
{
using (StreamWriter sw = new StreamWriter(absolutePath, false))
sw.Write(fileText);
}
 
} //eof clas 
} //eof namespace 
 
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
 
namespace FileSplitter
{
using System.IO;
 
class FileCounter
{
static string bigFile = @"D:\mylibs\dictionary_dump\dic-0294\dbo.Msg.TableInsert.sql";  
static void Main()
{
Console.WriteLine(" The number of lines is " + CountLinesInFile(bigFile).ToString());
Console.Read();
}
 
/// <summary>
/// Count the number of lines in the file specified.
/// </summary>
/// <param name="f">The filename to count lines in.</param>
/// <returns>The number of lines in the file.</returns>
static long CountLinesInFile(string f)
{
long count = 0;
using (StreamReader r = new StreamReader(f))
{
StringBuilder objStringBuilder = new StringBuilder();
 
 
string line;
while ((line = r.ReadLine()) != null)
{
//not append !!! we need \n
objStringBuilder.AppendLine(line);
count++;
 
int nuberOfLinesPerSmallFile = 1000; 
 
if (count%nuberOfLinesPerSmallFile == 0)
{
string smallFileName = (count - nuberOfLinesPerSmallFile).ToString() + "-" +  count.ToString() + "_"  + System.IO.Path.GetFileName(bigFile ) ;
smallFileName = System.IO.Path.GetDirectoryName(bigFile) + "\\" + smallFileName;
Console.WriteLine(" Writing to " + smallFileName); 
Gaf.IO.FileUtility.WriteToFile(smallFileName, objStringBuilder.ToString());
objStringBuilder = new StringBuilder();
} //eof if                
} //eof while
} //eof using
return count;
} //eof method 
 
} //eof class 
} //eof namespace 

No comments:

Post a Comment

- the first minus - Comments have to be moderated because of the spammers
- the second minus - I am very lazy at moderating comments ... hardly find time ...
- the third minus - Short links are no good for security ...
- The REAL PLUS : Any critic and positive feedback is better than none, so your comments will be published sooner or later !!!!

Labels

perl (41) Cheat Sheet (25) how-to (24) windows (14) sql server 2008 (13) linux (12) oracle (12) sql (12) Unix (11) cmd windows batch (10) mssql (10) cmd (9) script (9) textpad (9) netezza (8) sql server 2005 (8) cygwin (7) meta data mssql (7) metadata (7) bash (6) code generation (6) Informatica (5) cheatsheet (5) energy (5) tsql (5) utilities (5) excel (4) future (4) generic (4) git cheat sheet (4) html (4) perl modules (4) programs (4) settings (4) sh (4) shortcuts (4) поуки (4) принципи (4) Focus Fusion (3) Solaris (3) cool programs (3) development (3) economy (3) example (3) freeware (3) fusion (3) logging (3) morphus (3) mssql 2005 (3) nuclear (3) nz (3) parse (3) python (3) sftp (3) sofware development (3) source (3) sqlplus (3) table (3) vim (3) .Net (2) C# (2) China (2) GUI (2) Google (2) GoogleCL (2) Solaris Unix (2) architecture (2) ascii (2) awk (2) batch (2) cas (2) chrome extensions (2) code2html (2) columns (2) configuration (2) conversion (2) duplicates (2) excel shortcuts (2) export (2) file (2) free programs (2) informatica sql repository (2) linux cheat sheet (2) mssql 2008 (2) mysql (2) next big future (2) nsis (2) nz netezza cheat sheet (2) nzsql (2) ora (2) prediction (2) publish (2) release management (2) report (2) security (2) single-click (2) sqlserver 2005 (2) sqlserver 2008 (2) src (2) ssh (2) template (2) tools (2) vba (2) video (2) xlt (2) xml (2) youtube videos (2) *nix (1) .vimrc (1) .virmrc vim settings configs (1) BSD license (1) Bulgaria (1) Dallas (1) Database role (1) Dense plasma focus (1) Deployment (1) ERP (1) ExcelToHtml (1) GD (1) GDP (1) HP-UX (1) Hosting (1) IDEA (1) INC (1) IT general (1) ITIL management bullshit-management (1) IZarc (1) Java Web Start (1) JavaScript anchor html jquery (1) Khan Academy (1) LINUX UNIX BASH AND CYGWIN TIPS AND TRICKS (1) Linux Unix rpm cpio build install configure (1) Linux git source build .configure make (1) ListBox (1) MIT HYDROGEN VIRUS (1) OO (1) Obama (1) PowerShell (1) Run-time (1) SDL (1) SIWA (1) SOX (1) Scala (1) Services (1) Stacks (1) SubSonic (1) TED (1) abstractions (1) ansible hosts linux bash (1) ansible linux deployment how-to (1) ansible yum pip python (1) apache (1) apache 2.2 (1) application life cycle (1) architecture input output (1) archive (1) arguments (1) avatar (1) aws cheat sheet cli (1) aws cli (1) aws cli amazon cheat sheet (1) aws elb (1) backup (1) bash Linux open-ssh ssh ssh_server ssh_client public-private key authentication (1) bash perl search and replace (1) bash stub (1) bin (1) biofuels (1) biology (1) books (1) browser (1) bubblesort (1) bugs (1) build (1) byte (1) cas_sql_dev (1) chennai (1) chrome (1) class (1) claut (1) cmdow (1) code generation sqlserver (1) command (1) command line (1) conf (1) confluence (1) console (1) convert (1) cool programs windows free freeware (1) copy paste (1) copy-paste (1) csv (1) ctags (1) current local time (1) cygwin X11 port-forwarding mintty xclock Linux Unix X (1) cygwin bash how-to tips_n_tricks (1) cygwin conf how-to (1) data (1) data types (1) db2 cheat sheet (1) db2 starter ibm bash Linux (1) debt (1) diagram (1) dictionaries (1) digital (1) disk (1) disk space (1) documentation (1) dos (1) dubai (1) e-cars (1) electric cars (1) electricity (1) emulate (1) errors (1) exponents (1) export workflow (1) extract (1) fast export (1) fexp (1) file extension (1) file permissions (1) findtag (1) firewall (1) for loop (1) freaky (1) functions (1) fusion research (1) german (1) git gitlab issues handling system (1) google cli (1) google code (1) google command line interface (1) gpg (1) ha (1) head (1) helsinki (1) history (1) hop or flop (1) host-independant (1) how-to Windows cmd time date datetime (1) ibm db2 cognos installation example db deployment provisioning (1) ideas (1) image (1) informatica oracle sql (1) informatica repo sql workflows sessions file source dir (1) informatica source files etl (1) install (1) isg-pub issue-tracker architecture (1) it management best practices (1) java (1) jump to (1) keyboard shortcuts (1) ksh (1) level (1) linkedin (1) linux bash ansible hosts (1) linux bash commands (1) linux bash how-to shell expansion (1) linux bash shell grep xargs (1) linux bash tips and t ricks (1) linux bash unix cygwin cheatsheet (1) linux bash user accounts password (1) linux bash xargs space (1) linux cheat-sheet (1) linux cheatsheet cheat-sheet revised how-to (1) linux how-to non-root vim (1) linux ssh hosts parallel subshell bash oneliner (1) london (1) make (1) me (1) metacolumn (1) metadata functions (1) metaphonre (1) method (1) model (1) movie (1) multithreaded (1) mysql cheat sheet (1) mysql how-to table datatypes (1) n900 (1) nano (1) neteza (1) netezza bash linux nps (1) netezza nps (1) netezza nps nzsql (1) netezza nz Linux bash (1) netezza nz bash linux (1) netezza nz nzsql sql (1) netezza nzsql database db sizes (1) non-password (1) nord pol (1) nps backup nzsql schema (1) number formatting (1) nz db size (1) nz table count rows (1) nzsql date timestamp compare bigint to_date to_char now (1) on-lier (1) one-liners (1) one-to-many (1) oneliners (1) open (1) open source (1) openrowset (1) openssl (1) oracle PL/SQL (1) oracle Perl perl (1) oracle installation usability (1) oracle number formatting format-model ora-sql oracle (1) oracle templates create table (1) oracle trigger generic autoincrement (1) oracle vbox virtual box cheat sheet (1) oracle virtual box cheat sheet (1) outlook (1) parser (1) password (1) paths (1) perl @INC compile-time run-time (1) perl disk usage administration Linux Unix (1) perl modules configuration management (1) permissions (1) php (1) picasa (1) platform (1) postgreSQL how-to (1) powerShell cmd cygwin mintty.exe terminal (1) ppm (1) predictions (1) prices (1) principles (1) productivity (1) project (1) prompt (1) proxy account (1) public private key (1) publishing (1) putty (1) qt (1) read file (1) registry (1) relationship (1) repository (1) rm (1) scala ScalaFmt (1) scp (1) scripts (1) scsi (1) search and replace (1) sed (1) sendEmail (1) sh stub (1) shortcuts Windows sql developer Oracle (1) sidebar (1) silicon (1) smells (1) smtp (1) software development (1) software procurement (1) sofware (1) sort (1) sql script (1) sql_dev (1) sqlcmd (1) sqlite (1) sqlite3 (1) sshd (1) sshd cygwin (1) stackoverflow (1) stored procedure (1) stub (1) stupidity (1) subroutines (1) svn (1) sysinternals (1) system design (1) tail (1) tar (1) temp table (1) templates (1) teradata (1) terminal (1) test (1) testing (1) theory (1) thorium (1) time (1) tip (1) title (1) tmux .tmux.conf configuration (1) tmux efficiency bash (1) tool (1) ui code prototyping tips and tricks (1) umask Linux Unix bash file permissions chmod (1) url (1) urls (1) user (1) utility (1) utils (1) vb (1) vbox virtual box cheat sheet (1) vim perl regex bash search for string (1) vim recursively hacks (1) vim starter (1) vim-cheat-sheet vim cheat-sheet (1) vimeo (1) visual stuio (1) warsaw (1) wiki (1) wikipedia (1) window (1) windows 7 (1) windows 8 (1) windows programs (1) windows reinstall (1) windows utility batch perl space Windows::Clipboard (1) wisdoms (1) workflow (1) worth-reading (1) wrapper (1) xp_cmdshell (1) xslt (1) youtube (1)

Blog Archive

Translate with Google Translate

My Blog List