第8章 字符和字符串 字符类型 基本字符类型 类型 大小(字节) 范围 char 1 -128 到 127 或 0 到 255(取决于实现) signed char 1 -128 到 127 unsigned char 1 0 到 255 wchar_t 2 或 4 取决于实现 char16_t 2 0 到 65535(C++11+) char32_t 4 0 到 4294967295(C++11+) char8_t 1 0 到 255(C++20+,用于UTF-8编码)
字符常量 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 char c1 = 'A' ;char c2 = '\n' ; char c3 = '\\' ; char c4 = '\'' ; char c5 = '"' ; char c6 = '\0' ; wchar_t wc1 = L'A' ;wchar_t wc2 = L'中' ;char16_t c16 = u'A' ;char32_t c32 = U'A' ;
转义序列 转义序列 描述 \n 换行符 \t 制表符 \r 回车符 \ 反斜杠 ' 单引号 " 双引号 \0 空字符 \a 响铃符 \b 退格符 \f 换页符 \v 垂直制表符 \xhh 十六进制转义序列 \ooo 八进制转义序列
C风格字符串 字符串字面量 1 2 3 4 5 6 7 8 9 10 11 const char * str1 = "Hello, world!" ;const char str2[] = "Hello, world!" ;const char * str3 = "Line 1\n" "Line 2\n" "Line 3" ; const char * str4 = R"(Raw string with \n and ")" ;
字符串操作函数 C标准库提供了一系列字符串操作函数,声明在<cstring>头文件中:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 #include <cstring> const char * str = "Hello" ;size_t length = strlen (str); char dest[20 ];strcpy (dest, "Hello" ); strncpy (dest, "Hello" , sizeof (dest) - 1 );dest[sizeof (dest) - 1 ] = '\0' ; strcat (dest, " world" ); strncat (dest, " world" , sizeof (dest) - strlen (dest) - 1 );int result = strcmp ("Hello" , "World" ); result = strcmp ("Hello" , "Hello" ); result = strcmp ("World" , "Hello" ); result = strncmp ("Hello" , "World" , 3 ); const char * found = strchr ("Hello" , 'l' ); found = strrchr ("Hello" , 'l' ); found = strstr ("Hello world" , "world" );
字符串输入和输出 1 2 3 4 5 6 7 8 9 10 11 12 13 14 char name[50 ];std::cout << "Enter your name: " ; std::cin >> name; std::cout << "Your name is: " << name << std::endl; std::cout << "Enter a line: " ; std::cin.ignore (); std::cin.getline (name, sizeof (name)); std::cout << "You entered: " << name << std::endl; std::cout << "Hello, " << name << "!" << std::endl;
string 类(C++标准库) 基本用法 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 #include <string> std::string s1; std::string s2 = "Hello" ; std::string s3 ("Hello" ) ;std::string s4 (5 , 'a' ) ; std::string s5 (s2) ; std::string s6 (s2, 1 , 3 ) ; std::string s7 ({ 'H' , 'e' , 'l' , 'l' , 'o' }) ; s1 = "World" ; s1 = s2; s1. assign ("Hello" ); s1. assign ("Hello" , 2 , 3 ); s1. assign (5 , 'x' ); s1 = "Hello" ; s1 += " world" ; s1. append ("!" ); s1. append ("abc" , 2 ); s1. append (3 , '?' ); char c = s1[0 ]; c = s1. at (0 ); size_t len = s1.l ength();len = s1. size (); bool empty = s1. empty ();int result = s1. compare (s2); result = s1. compare (0 , 2 , s2, 0 , 2 ); size_t pos = s1.f ind('l' ); pos = s1.f ind("world" ); pos = s1.f ind('l' , 2 ); pos = s1. rfind ('l' ); pos = s1.f ind_first_of("aeiou" ); pos = s1.f ind_last_of("aeiou" ); pos = s1.f ind_first_not_of("abc" ); std::string sub = s1. substr (1 , 3 ); s1. insert (5 , " " ); s1. erase (5 , 1 ); s1. replace (0 , 5 , "Hi" ); int i = std::stoi ("123" ); long l = std::stol ("123456" ); double d = std::stod ("3.14" ); std::string s = std::to_string (123 ); std::string a = "Hello" , b = "World" ; a.swap (b);
string 类的输入和输出 1 2 3 4 5 6 7 8 9 10 11 12 13 14 std::string name; std::cout << "Enter your name: " ; std::cin >> name; std::cout << "Your name is: " << name << std::endl; std::cout << "Enter a line: " ; std::cin.ignore (); std::getline (std::cin, name); std::cout << "You entered: " << name << std::endl; std::cout << "Hello, " << name << "!" << std::endl;
字符串流 字符串输入流(istringstream) 1 2 3 4 5 6 7 8 9 10 11 #include <sstream> std::string data = "123 45.67 Hello" ; std::istringstream iss (data) ;int i;double d;std::string s; iss >> i >> d >> s; std::cout << "i: " << i << ", d: " << d << ", s: " << s << std::endl;
字符串输出流(ostringstream) 1 2 3 4 5 6 7 8 9 10 #include <sstream> std::ostringstream oss; int i = 123 ;double d = 45.67 ;std::string s = "Hello" ; oss << "i: " << i << ", d: " << d << ", s: " << s; std::string result = oss.str (); std::cout << result << std::endl;
字符串流的应用 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 int number = 123 ;std::ostringstream oss; oss << number; std::string numberStr = oss.str (); std::string numberStr = "123" ; std::istringstream iss (numberStr) ;int number;iss >> number; std::ostringstream oss; oss << std::fixed << std::setprecision (2 ); oss << "Pi is approximately " << 3.14159 ; std::string message = oss.str ();
宽字符串 宽字符和宽字符串 1 2 3 4 5 6 7 8 9 10 11 wchar_t wc = L'A' ;const wchar_t * wstr = L"Hello, world!" ;std::wcout << L"Enter your name: " ; std::wstring wname; std::wcin >> wname; std::wcout << L"Hello, " << wname << L"!" << std::endl;
wstring 类 1 2 3 4 5 6 7 8 std::wstring ws1 = L"Hello" ; std::wstring ws2 (5 , L'a' ) ;ws1 += L" world" ; size_t len = ws1.l ength();std::wcout << ws1 << std::endl;
Unicode 字符串 UTF-8 字符串 1 2 3 4 5 6 7 8 const char * utf8Str = u8"Hello, 世界!" ;std::string utf8String = u8"Hello, 世界!" ; std::cout << utf8String << std::endl;
UTF-16 字符串 1 2 3 4 5 const char16_t * utf16Str = u"Hello, 世界!" ;std::u16string utf16String = u"Hello, 世界!" ;
UTF-32 字符串 1 2 3 4 5 const char32_t * utf32Str = U"Hello, 世界!" ;std::u32string utf32String = U"Hello, 世界!" ;
字符串的最佳实践 1. 优先使用 std::string 安全性 :std::string 自动管理内存,避免缓冲区溢出便捷性 :std::string 提供了丰富的成员函数可读性 :std::string 的代码更易读、易维护兼容性 :std::string 可以与 C 风格字符串互操作2. 避免缓冲区溢出 1 2 3 4 5 6 7 char buffer[10 ];std::cin >> buffer; std::string buffer; std::cin >> buffer;
3. 字符串连接 1 2 3 4 5 6 7 8 9 10 11 12 13 14 std::string result; result = "Hello" ; result += " " ; result += "world" ; result += "!" ; std::string result = "Hello" + std::string (" " ) + "world" + "!" ; std::ostringstream oss; oss << "Hello" << " " << "world" << "!" ; std::string result = oss.str ();
4. 字符串比较 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 const char * str1 = "Hello" ;const char * str2 = "Hello" ;if (str1 == str2) { } if (strcmp (str1, str2) == 0 ) { } std::string s1 = "Hello" ; std::string s2 = "Hello" ; if (s1 == s2) { }
5. 字符串转换 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 int number = 123 ;std::string str = std::to_string (number); std::ostringstream oss; oss << number; std::string str = oss.str (); std::string str = "123" ; int number = std::stoi (str);std::istringstream iss (str) ;int number;iss >> number;
C++11+字符串处理新特性 字符串视图(std::string_view,C++17+) std::string_view是C++17引入的一个非所有权字符串视图,用于提供对字符串的高效访问,避免不必要的字符串复制:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 #include <string_view> #include <string> std::string s = "Hello, world!" ; std::string_view sv (s) ;std::cout << sv << std::endl; const char * cstr = "Hello" ;std::string_view sv2 (cstr) ;std::string_view sv3 (s, 0 , 5 ) ; std::cout << "Length: " << sv.length () << std::endl; std::cout << "Empty: " << sv.empty () << std::endl; std::cout << "Substring: " << sv.substr (7 , 5 ) << std::endl; size_t pos = sv.find ("world" );if (pos != std::string_view::npos) { std::cout << "Found 'world' at position: " << pos << std::endl; } if (sv.starts_with ("Hello" )) { std::cout << "Starts with 'Hello'" << std::endl; } if (sv.ends_with ("!" )) { std::cout << "Ends with '!'" << std::endl; }
std::string的新方法(C++11+) C++11新方法 1 2 3 4 5 6 7 8 std::string s1 = "Hello" ; std::string s2 = std::move (s1); std::string s; s.emplace_back ('H' ); s.append ("ello" );
C++14新方法 1 2 3 4 5 6 using namespace std::string_literals;std::string s = "Hello" s; std::string raw = R"(Raw string with "quotes" and \backslashes)" s;
C++20新方法 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 std::string s = "Hello, world!" ; if (s.starts_with ("Hello" )) { std::cout << "Starts with 'Hello'" << std::endl; } if (s.ends_with ("!" )) { std::cout << "Ends with '!'" << std::endl; } if (s.starts_with ({'H' , 'e' })) { std::cout << "Starts with 'He'" << std::endl; } std::vector<char > chars = {'H' , 'e' , 'l' , 'l' , 'o' }; std::string s2 (chars.begin(), chars.end()) ;
C++23新方法 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 std::string s = "Hello, world!" ; if (s.contains ("world" )) { std::cout << "Contains 'world'" << std::endl; } if (s.contains ('o' )) { std::cout << "Contains 'o'" << std::endl; } std::string s; s.resize_and_overwrite (10 , [](char * buffer, size_t size) -> size_t { std::memcpy (buffer, "Hello" , 5 ); return 5 ; }); std::cout << s << std::endl;
正则表达式(C++11+) C++11引入了std::regex库,用于字符串的模式匹配和替换:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 #include <regex> #include <string> std::string s = "Hello, world!" ; std::regex pattern ("world" ) ;if (std::regex_search (s, pattern)) { std::cout << "Found 'world'" << std::endl; } std::string date = "2023-12-25" ; std::regex datePattern (R"((\d{4})-(\d{2})-(\d{2}))" ) ;std::smatch matches; if (std::regex_search (date, matches, datePattern)) { std::cout << "Year: " << matches[1 ] << std::endl; std::cout << "Month: " << matches[2 ] << std::endl; std::cout << "Day: " << matches[3 ] << std::endl; } std::string text = "Hello, world! Hello, C++!" ; std::regex replacePattern ("Hello" ) ;std::string result = std::regex_replace (text, replacePattern, "Hi" ); std::cout << result << std::endl; std::regex caseInsensitivePattern ("hello" , std::regex::icase) ;if (std::regex_search (s, caseInsensitivePattern)) { std::cout << "Found 'hello' (case insensitive)" << std::endl; }
C++20引入了std::format库,提供了一种类型安全、灵活的字符串格式化方法:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 #include <format> #include <string> std::string message = std::format("Hello, {}!" , "world" ); std::cout << message << std::endl; std::string info = std::format("Name: {}, Age: {}" , "Alice" , 30 ); std::cout << info << std::endl; std::string number = std::format("Pi is approximately {:.2f}" , 3.14159 ); std::cout << number << std::endl; std::string aligned = std::format("{:<10} {:>10}" , "Left" , "Right" ); std::cout << aligned << std::endl; std::string hex = std::format("Decimal: {}, Hex: {:x}, Octal: {:o}" , 42 , 42 , 42 ); std::cout << hex << std::endl;
类型安全 :相比printf,std::format是类型安全的灵活性 :支持位置参数和命名参数可读性 :格式化字符串更清晰易读性能 :性能与printf相当或更好扩展性 :支持自定义类型的格式化C++23新特性:print库 C++23引入了std::print和std::println函数,提供了一种更方便的字符串输出方法:
1 2 3 4 5 6 7 8 9 10 11 12 13 #include <print> std::print ("Hello, {}}!" , "world" ); std::println ("Hello, {}!" , "world" ); std::println ("Name: {}, Age: {}" , "Alice" , 30 ); std::println ("Pi is approximately {:.2f}" , 3.14159 );
Unicode字符串处理进阶 Unicode码点和代码单元 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 #include <cuchar> #include <string> void printUtf8CodePoints (const std::string& utf8Str) { const char * p = utf8Str.data (); const char * end = p + utf8Str.size (); while (p < end) { char32_t codePoint; size_t len = mbrtoc32 (&codePoint, p, end - p, nullptr ); if (len == static_cast <size_t >(-1 ) || len == static_cast <size_t >(-2 )) { break ; } std::cout << "Code point: U+" << std::hex << codePoint << std::endl; p += len; } } std::string utf8Str = u8"Hello, 世界!" ; printUtf8CodePoints (utf8Str);
Unicode字符串的转换 1 2 3 4 5 6 7 8 9 10 11 12 13 #include <codecvt> #include <locale> std::wstring_convert<std::codecvt_utf8_utf16<wchar_t >> converter; std::string utf8Str = u8"Hello, 世界!" ; std::wstring utf16Str = converter.from_bytes (utf8Str); std::string utf8Str2 = converter.to_bytes (utf16Str);
常见错误和陷阱 1. 空指针解引用 1 2 3 4 5 6 7 8 const char * str = nullptr ;size_t len = strlen (str); if (str != nullptr ) { size_t len = strlen (str); }
2. 缓冲区溢出 1 2 3 4 5 6 7 8 9 10 char buffer[10 ];strcpy (buffer, "This string is too long" ); strncpy (buffer, "This string is too long" , sizeof (buffer) - 1 );buffer[sizeof (buffer) - 1 ] = '\0' ; std::string buffer = "This string is too long" ;
3. 忘记 null 终止符 1 2 3 4 5 6 7 8 9 10 11 12 13 14 char buffer[10 ];for (int i = 0 ; i < 10 ; i++) { buffer[i] = 'a' ; } std::cout << buffer << std::endl; char buffer[11 ]; for (int i = 0 ; i < 10 ; i++) { buffer[i] = 'a' ; } buffer[10 ] = '\0' ; std::cout << buffer << std::endl;
4. 字符串字面量的修改 1 2 3 4 5 6 7 char * str = "Hello" ;str[0 ] = 'h' ; char str[] = "Hello" ;str[0 ] = 'h' ;
5. 混合使用 C 风格字符串和 std::string 1 2 3 4 5 6 7 8 9 std::string s = "Hello" ; const char * cstr = s.c_str ();std::string s = "Hello" ; std::string copy = s; const char * cstr = copy.c_str ();
小结 本章介绍了C++中的字符和字符串处理,包括:
字符类型 :char、wchar_t、char16_t、char32_tC风格字符串 :字符数组、字符串字面量、字符串操作函数std::string 类 :C++标准库提供的字符串类,具有丰富的成员函数字符串流 :istringstream 和 ostringstream,用于字符串的输入输出宽字符串 :wchar_t 和 std::wstringUnicode 字符串 :UTF-8、UTF-16、UTF-32 字符串字符串的最佳实践 :优先使用 std::string,避免缓冲区溢出等常见错误和陷阱 :空指针解引用、缓冲区溢出、忘记 null 终止符等字符串是C++程序中最常用的数据类型之一,掌握好字符串的处理方法对于编写高效、可靠的程序至关重要。在实际编程中,应优先使用 std::string 类,它提供了更安全、更便捷的字符串操作方式。同时,也要了解 C 风格字符串的基本概念和操作函数,因为在一些遗留代码或与 C 库交互的场景中仍然会用到。
在后续章节中,我们将学习更高级的C++特性,如内存模型、面向对象编程、模板等,这些特性将与字符串处理结合使用,帮助我们构建更复杂、更强大的程序。