[LC++]Weird behavior

Peter Poulsen peter_poulsen at stofanet.dk
Wed Feb 25 15:01:02 UTC 2004


I made this little tokenizer, which give me unexpected result. Does
anybody know why? (I compiled it with 2.95 so it may require minor
modification to run with 3.2. Please bare with me.)

---------- tokenizer.hh ------------------
#ifndef TOKENIZER_HH
#define TOKENIZER_HH

#include <stdexcept>
#include <string>
#include <stack>

#include <algorithm>

namespace std
{
        class Tokenizer {
        public:
                Tokenizer(string whitespace) : _whitespace(whitespace) {}
                virtual ~Tokenizer() {}
                void tokenize(string const& line);
                string next_token() throw (underflow_error);
                void delete_tokens();
                bool is_empty() const { return _tokens.empty(); }
                
        private:
                stack<string> _tokens;
                string _whitespace;
        };
}
#endif // TOKENIZER_HH


------------ tokenizer.cc -----------------
#include <tokenizer.hh>

namespace std
{
        void Tokenizer::tokenize(string const& line) {
                unsigned int start_point = line.length(), end_point = line.length();
                string token_str;
                delete_tokens();

                while(start_point < string::npos && end_point < string::npos) {
                        end_point = line.find_last_not_of(_whitespace, start_point);
                        start_point = line.find_last_of(_whitespace, end_point);
                        
                        token_str = line.substr(start_point + 1, end_point - start_point);
                        if(token_str.length() > 0 && end_point != string::npos && start_point+1 != string::npos) {
                                _tokens.push(token_str);
                        }
                }
        }

        string Tokenizer::next_token() throw (underflow_error) {
                string token;
                if(_tokens.empty())
                        throw underflow_error("The tokenizer ran out of tokens.");
                
                token = _tokens.top();
                _tokens.pop();

                return token;
        }

        void Tokenizer::delete_tokens() {
                while(!_tokens.empty())
                        _tokens.pop();
        }
}

----------------- main.cc --------------------------
#include <tokenizer.hh>
#include <string>
#include <iostream>

int main()
{
        std::Tokenizer t(" \n");
        std::string line = "one two";
        t.tokenize(line);
        // I expect to get first one, then two. It seems to work
        std::cout << t.next_token() << endl;
        std::cout << t.next_token() << endl;

        t.tokenize(line);
        // I expect to get "onetwo", but gets "twoone"! Why?
        std::cout << t.next_token() << t.next_token() << endl;
        return 0;
}

---------------- makefile ----------------------------
all:
        g++ -Wall -I. main.cc tokenizer.cc -o t


-- 
Yours 
Peter Poulsen




More information about the tuxCPProgramming mailing list