/* This program uses getline() to read lines of text, and print their line numbers and size. */
#include <iostream>
#include <cstdio>
using namespace std;
int main()
{
string s;
int ln;
ln = 0;
while (getline(cin, s)) {
ln++;
printf("Line %2d - %s\n", ln, s.c_str());
}
return 0;
}
|
Running it, we see that it works as promised:
UNIX> cat data/input.txt
Give me a weapon of power, which no one else may hold,
Defend the Gods with honor, To lead the BRAVE and BOLD
LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
Bequeathed to me by Odin, Molded by the Dwarfs
MINE! This shimmering mallet, The Symbol of the Norse
LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
UNIX> bin/getline < data/input.txt
Line 1 - Give me a weapon of power, which no one else may hold,
Line 2 - Defend the Gods with honor, To lead the BRAVE and BOLD
Line 3 -
Line 4 - LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
Line 5 - LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
Line 6 -
Line 7 - Bequeathed to me by Odin, Molded by the Dwarfs
Line 8 - MINE! This shimmering mallet, The Symbol of the Norse
Line 9 -
Line 10 - LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
Line 11 - LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
UNIX>
UNIX> echo Jim Plank # Without quotes, the spaces are ignored and only the words matter. Jim Plank UNIX> echo " Jim Plank" # With quotes, the spaces are retained. Jim Plank UNIX> echo ' Jim Plank' # Single quotes work too. Jim Plank UNIX>
They are very natural, so you can often write programs with them without thinking about it too much. I'm assuming that all of the functionalities that I talk about here are review for you, but I want to make sure that we're all on the same page.
/* This program illustrates assigning a string variable from a literal */
#include <iostream>
using namespace std;
int main()
{
string s;
s = "Hello World!"; // This is the assignment statement.
cout << "S: " << s << endl;
return 0;
}
|
When we run it, it prints "Hello World!": UNIX> bin/string_assignment S: Hello World! UNIX>
/* This program reads a string from standard input, and then reports its size using the size() method. */
#include <iostream>
using namespace std;
int main()
{
string s;
if (cin >> s) cout << "The string's size is " << s.size() << "." << endl;
return 0;
}
|
Here are straightforward examples:
UNIX> echo hi | bin/string_size The string's size is 2. UNIX> echo "Weeeeeeee" | bin/string_size The string's size is 9. UNIX> echo "" | bin/string_size # You'll note that here, cin failed, so the program exited. UNIX>
/* This program reads a string from standard input, and then changes its characters by
switching upper and lower-case letters. */
#include <iostream>
using namespace std;
int main()
{
string s;
size_t i;
if (cin >> s) {
for (i = 0; i < s.size(); i++) {
if (s[i] >= 'A' && s[i] <= 'Z') { // If upper-case, change to lower-case
s[i] += ('a'-'A');
} else if (s[i] >= 'a' && s[i] <= 'z') { // If lower-case, change to upper-case
s[i] += ('A'-'a');
}
}
cout << "Swapped upper and lower-case: " << s << "." << endl;
}
return 0;
}
|
Below we demonstrate it running correctly. Note that since the hyphen is neither an upper-case or lower-case letter, it remains unchanged in the string:
UNIX> echo Jim-Plank | bin/string_modify Swapped upper and lower-case: jIM-pLANK. UNIX>
/* This program reads two lines from standard input, and then tests them for equality. */
#include <iostream>
using namespace std;
int main()
{
string s1, s2;
if (getline(cin, s1)) { // Read the two lines.
if (getline (cin, s2)) {
if (s1 == s2) { // Test for equality.
cout << "Equal" << endl;
} else {
cout << "Not equal" << endl;
}
}
}
return 0;
}
|
When we run it, sometimes things are simple and straightforward, and sometimes they're not:
UNIX> ( echo "hi" ; echo "there" ) | bin/string_equality # These are clearly not equal. Not equal UNIX> ( echo "hi" ; echo "hi" ) | bin/string_equality # While these are clearly equal. Equal UNIX> ( echo "hi" ; echo " hi" ) | bin/string_equality # Spaces matter, so these strings are not equal. Not equal UNIX> ( echo "1" ; echo "One" ) | bin/string_equality # They are strings, not anything special. Not equal UNIX> ( echo "hi" ; echo "HI" ) | bin/string_equality # Case matters. Not equal UNIX>
/* This program reads two lines from standard input, and then compares them. */
#include <iostream>
using namespace std;
int main()
{
string s1, s2;
if (getline(cin, s1)) { // Read the two lines.
if (getline (cin, s2)) {
if (s1 < s2) { // Test whether the first is less than the second.
cout << "Less than" << endl;
} else if (s1 > s2) { // Test whether the first is greater than the second.
cout << "Greater than" << endl;
} else { // Otherwise, they are equal
cout << "Equal" << endl;
}
}
}
return 0;
}
|
When we run it, we see lexicographic comparison in action.
UNIX> ( echo a ; echo b ) | bin/string_comparison Less than UNIX> ( echo b ; echo a ) | bin/string_comparison Greater than UNIX> ( echo a ; echo a ) | bin/string_comparison Equal UNIX> ( echo Jim ; echo Plank ) | bin/string_comparison # This is because 'J' is less than 'P' Less than UNIX> ( echo aa ; echo a ) | bin/string_comparison # If one string is a superstring of the other, then it is greater. Greater than UNIX> ( echo A ; echo a ) | bin/string_comparison # Upper case is "smaller" than lower case. Less than UNIX> ( echo 1 ; echo A ) | bin/string_comparison # Digits are "smaller" than upper case. Less than UNIX> ( echo " 123" ; echo "123" ) | bin/string_comparison # Beware of spaces. Less than UNIX>
/* This program reads words from standard input, and prints their concatenation (without spaces). */
#include <iostream>
using namespace std;
int main()
{
string rv, s;
rv = "";
while (cin >> s) rv += s;
cout << rv << endl;
return 0;
}
|
When we run it, you see that the words from echo are concatenated without spaces:
UNIX> echo a b c d e f g h i j k lmnop | bin/string_concatenation abcdefghijklmnop UNIX> ( echo a b c ; echo d e f ) | bin/string_concatenation # Reading like this works on words, and ignores lines. abcdef UNIX>
vector <TYPE> variable-name(size) // The size is optional |
The TYPE can be a basic type like int or double, or it can be more complex, like a C++ class or even another vector. You can declare a vector as starting with a certain number of elements, or you can declare it to be empty. In either case, you can dynamically modify the vector's size with the resize() method. You can use the size() method to get the vector's current size.
A very simple example program is in src/vec1.cpp:
/* A simple program to show the basics of vectors:
- Declaring them.
- Checking their size
- Resizing
- Setting values;
*/
#include <cstdio>
#include <vector>
#include <iostream>
using namespace std;
int main()
{
vector <int> v1;
vector <double> v2(10);
size_t i;
/* Print out v1's size and v2's size. It is unfortunate that size() returns
an "size_t", which is an unsigned long, instead of an int, so you must use
"%lu" or "%ld" instead of %d inside printf(). We'll talk about it in class. */
printf("V1's size: %lu. V2's size: %lu\n", v1.size(), v2.size());
/* Resize the vectors and print out the new sizes. */
v1.resize(5);
v2.resize(8);
printf("V1's size: %lu. V2's size: %lu\n", v1.size(), v2.size());
/* Set the vectors' values, and print them out. */
for (i = 0; i < v1.size(); i++) v1[i] = 10 + i;
for (i = 0; i < v2.size(); i++) v2[i] = 20.3 + i;
printf("V1:");
for (i = 0; i < v1.size(); i++) printf(" %d", v1[i]);
printf("\n");
printf("V2:");
for (i = 0; i < v2.size(); i++) printf(" %.1lf", v2[i]);
printf("\n");
return 0;
}
|
This program declares an empty integer vector v1 and a ten-element double vector v2, and prints their sizes. It then resizes the vectors to five and eight and prints their sizes again. It then initializes the elements of v1 and v2 in two for loops and prints out the two vectors. Straightforward stuff:
UNIX> bin/vec1 V1's size: 0. V2's size: 10 V1's size: 5. V2's size: 8 V1: 10 11 12 13 14 V2: 20.3 21.3 22.3 23.3 24.3 25.3 26.3 27.3 UNIX>One note -- when you print a size() with printf(), you need to specify "%lu" or "%ld" instead of "%d". This is because sizes are 64-bit, unsigned quantities. If you don't do it, you will get a compiler warning. Similarly, you'll want to declare i to be a size_t, which again is an unsigned long. Otherwise, you'll get compiler warnings when you compare i to v1.size().
When you create array elements, default values are placed in there. For example, string arrays start with default empty strings. Numerical values should be zero, but frankly I'd be leery of trusting that. You can specify what the default values should be as a second parameter to the resize() method.
Take a careful look at src/vec2.cpp:
/* This program shows some subtleties of resizing vectors. */
#include <cstdio>
#include <vector>
#include <iostream>
using namespace std;
int main()
{
vector <int> v1;
size_t i;
/* Start with two v1.resizes, setting new elements to 22 and 33. */
v1.resize(5, 22);
v1.resize(8, 33);
printf("Initial V1: ");
for (i = 0; i < v1.size(); i++) printf(" %d", v1[i]);
printf("\n");
/* Chop it down to six elements. */
v1.resize(6);
printf("v1.resize(6): ");
for (i = 0; i < v1.size(); i++) printf(" %d", v1[i]);
printf("\n");
/* Now resize to 10 elements, setting the new ones to 44. */
v1.resize(10, 44);
printf("v1.resize(10, 44): ");
for (i = 0; i < v1.size(); i++) printf(" %d", v1[i]);
printf("\n");
/* Does this add 5 new random elements, or 5 copies of one random element? */
v1.resize(15, rand());
printf("v1.resize(15, rand()): ");
for (i = 0; i < v1.size(); i++) printf(" %d", v1[i]);
printf("\n");
return 0;
}
|
We first resize v1 to hold five elements that are 22, and then we resize v2 to hold eight elements with a default of 33. This raises a question -- will only the new elements be 33, or will all of them? Look at the output:
UNIX> bin/vec2 Initial V1: 22 22 22 22 22 33 33 33 v1.resize(6): 22 22 22 22 22 33 v1.resize(10, 44): 22 22 22 22 22 33 44 44 44 44 v1.resize(15, rand()): 22 22 22 22 22 33 44 44 44 44 16807 16807 16807 16807 16807 UNIX>Only the new ones are given values. The third resize() removes two elements, and then the fourth increases the size from 6 to 10, putting the value of 44 into the new elements.
The last resize() adds 5 new elements and gives them a default of rand(), which is a random integer. You may wonder, will that create five different random numbers, or five copies of one random number? It is the latter, because the resize() command simply calls rand() once, and its return value is passed to the resize() command. (BTW, I will discuss random numbers later in class).
Here's src/reverse.cpp:
/* This program uses a vector to print standard input in reverse order. */
#include <vector>
#include <iostream>
using namespace std;
int main()
{
vector <string> lines;
int i;
string s;
/* Read every line of standard input into a vector called lines. */
while (getline(cin, s)) lines.push_back(s);
/* Now print lines in reverse order. */
for (i = lines.size()-1; i >= 0; i--) cout << lines[i] << endl;
return 0;
}
|
It makes use of the vector method push_back(), which appends an element to a vector. push_back() is guaranteed to run quickly, and it is much more convenient than resizing the array.
To show reverse running, I first call cat -n on data/input.txt. That prints data/input.txt to the screen with line numbers.
UNIX> cat -n data/input.txt
1 Give me a weapon of power, which no one else may hold,
2 Defend the Gods with honor, To lead the BRAVE and BOLD
3
4 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
5 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
6
7 Bequeathed to me by Odin, Molded by the Dwarfs
8 MINE! This shimmering mallet, The Symbol of the Norse
9
10 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
11 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
UNIX> cat -n data/input.txt | bin/reverse
11 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
10 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
9
8 MINE! This shimmering mallet, The Symbol of the Norse
7 Bequeathed to me by Odin, Molded by the Dwarfs
6
5 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
4 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
3
2 Defend the Gods with honor, To lead the BRAVE and BOLD
1 Give me a weapon of power, which no one else may hold,
UNIX>
You may have noticed that I didn't use a size_t for i, but instead used
an int. That's because size_t's are unsigned quantities, and therefore cannot
assume negative values. If you try to use a size_t for i, you'll get a
compiler warning:
reverse.cpp:19:30: warning: comparison of unsigned expression >= 0 is always true
[-Wtautological-compare]
for (i = lines.size()-1; i >= 0; i--) cout << lines[i] << endl;
Do you think it's a good thing that when i is a size_t, that it can't be
set to -1? I personally don't, but no one asked me. Be aware of it, and pay attention to
the warnings emitted by the compiler.
The second program performs the same functionality as the tail command -- it prints out the last ten lines of standard input. We can write a simple version of tail that is like reverse.cpp. It reads all of the lines into a vector and then prints out just the last ten lines. It's in src/mytail1.cpp:
/* This program prints out the last ten lines of a file, (or the whole file if it
has fewer than ten lines). It reads all of the lines into a vector, and then
prints out the last ten entries. */
#include <vector>
#include <iostream>
using namespace std;
int main()
{
vector <string> lines;
size_t i;
string s;
/* Read each line into the vector */
while (getline(cin, s)) lines.push_back(s);
/* Compute the first line to print */
if (lines.size() < 10) {
i = 0;
} else {
i = lines.size()-10;
}
/* And then print the lines. */
for ( ; i < lines.size(); i++) cout << lines[i] << endl;
return 0;
}
|
We need the if statement to handle files that are smaller than 10 lines. If we didn't have the if statement, then who knows what i would be when we set it to lines.size()-10, because size_t's can't have negative values. Hopefully that bug would be manifested by a segmentation violation, but you never know. Since we've put in that if statement, there is no bug:
UNIX> cat -n data/input.txt | bin/mytail1
2 Defend the Gods with honor, To lead the BRAVE and BOLD
3
4 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
5 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
6
7 Bequeathed to me by Odin, Molded by the Dwarfs
8 MINE! This shimmering mallet, The Symbol of the Norse
9
10 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
11 LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
UNIX> head -n 4 data/input.txt | bin/mytail1
Give me a weapon of power, which no one else may hold,
Defend the Gods with honor, To lead the BRAVE and BOLD
LIGHTNING STRIKES LIGHTNING STRIKES AGAIN
UNIX>
I'll contend, though, that src/mytail1.cpp is not as good of a program
as it could be. Why? Consider
what happens if you call it on a file with 1,000,000 lines. You are storing all
1,000,000 lines, but you are only printing the last ten. That's a big
waste of memory!
This problem is fixed in src/mytail2.cpp:
/* This program also prints out the last ten lines of standard input, however unlike
mytail1.cpp, it only stores ten lines, rather than the entire file. You keep track
of the total number of lines in the variable "ln", and you simply keep overwriting
the strings in the "lines" vector, until you get to the end of the file. */
#include <cstdio>
#include <vector>
#include <iostream>
using namespace std;
int main()
{
vector <string> lines;
int i, ln;
string s;
/* Read the lines into elements 0 through 9 of the vector "lines." */
ln = 0;
while (getline(cin, s)) {
if (ln < 10) {
lines.push_back(s);
} else {
lines[ln%10] = s;
}
ln++;
}
/* Set i to be (ln-10), or 0 if we haven't read ten lines. */
i = ln-10;
if (i < 0) i = 0;
/* Now print out the last ten lines. */
for ( ; i < ln; i++) cout << lines[i%10] << endl;
return 0;
}
|
Once lines becomes ten elements long, we no longer call push_back(), but instead replace the oldest element with s. When reading is done, we have the last ten lines, but not always starting at element 0. To print them out, we need to find the array element for each of the last ten lines. Consider line x. If it is in the array, it will be in element x%10. Thus, if the file has ln total lines and ln > 10, then we want to print out lines ln-10 to ln-1. The for loop that ends the program does just that.