00001
00002
00003 #include "Python.h"
00004
00005 #include <ctype.h>
00006
00007 #ifdef COUNT_ALLOCS
00008 int null_strings, one_strings;
00009 #endif
00010
00011 static PyStringObject *characters[UCHAR_MAX + 1];
00012 static PyStringObject *nullstring;
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 static PyObject *interned;
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051 PyObject *
00052 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
00053 {
00054 register PyStringObject *op;
00055 assert(size >= 0);
00056 if (size == 0 && (op = nullstring) != NULL) {
00057 #ifdef COUNT_ALLOCS
00058 null_strings++;
00059 #endif
00060 Py_INCREF(op);
00061 return (PyObject *)op;
00062 }
00063 if (size == 1 && str != NULL &&
00064 (op = characters[*str & UCHAR_MAX]) != NULL)
00065 {
00066 #ifdef COUNT_ALLOCS
00067 one_strings++;
00068 #endif
00069 Py_INCREF(op);
00070 return (PyObject *)op;
00071 }
00072
00073
00074 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
00075 if (op == NULL)
00076 return PyErr_NoMemory();
00077 PyObject_INIT_VAR(op, &PyString_Type, size);
00078 op->ob_shash = -1;
00079 op->ob_sstate = SSTATE_NOT_INTERNED;
00080 if (str != NULL)
00081 memcpy(op->ob_sval, str, size);
00082 op->ob_sval[size] = '\0';
00083
00084 if (size == 0) {
00085 PyObject *t = (PyObject *)op;
00086 PyString_InternInPlace(&t);
00087 op = (PyStringObject *)t;
00088 nullstring = op;
00089 Py_INCREF(op);
00090 } else if (size == 1 && str != NULL) {
00091 PyObject *t = (PyObject *)op;
00092 PyString_InternInPlace(&t);
00093 op = (PyStringObject *)t;
00094 characters[*str & UCHAR_MAX] = op;
00095 Py_INCREF(op);
00096 }
00097 return (PyObject *) op;
00098 }
00099
00100 PyObject *
00101 PyString_FromString(const char *str)
00102 {
00103 register size_t size;
00104 register PyStringObject *op;
00105
00106 assert(str != NULL);
00107 size = strlen(str);
00108 if (size > INT_MAX) {
00109 PyErr_SetString(PyExc_OverflowError,
00110 "string is too long for a Python string");
00111 return NULL;
00112 }
00113 if (size == 0 && (op = nullstring) != NULL) {
00114 #ifdef COUNT_ALLOCS
00115 null_strings++;
00116 #endif
00117 Py_INCREF(op);
00118 return (PyObject *)op;
00119 }
00120 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
00121 #ifdef COUNT_ALLOCS
00122 one_strings++;
00123 #endif
00124 Py_INCREF(op);
00125 return (PyObject *)op;
00126 }
00127
00128
00129 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
00130 if (op == NULL)
00131 return PyErr_NoMemory();
00132 PyObject_INIT_VAR(op, &PyString_Type, size);
00133 op->ob_shash = -1;
00134 op->ob_sstate = SSTATE_NOT_INTERNED;
00135 memcpy(op->ob_sval, str, size+1);
00136
00137 if (size == 0) {
00138 PyObject *t = (PyObject *)op;
00139 PyString_InternInPlace(&t);
00140 op = (PyStringObject *)t;
00141 nullstring = op;
00142 Py_INCREF(op);
00143 } else if (size == 1) {
00144 PyObject *t = (PyObject *)op;
00145 PyString_InternInPlace(&t);
00146 op = (PyStringObject *)t;
00147 characters[*str & UCHAR_MAX] = op;
00148 Py_INCREF(op);
00149 }
00150 return (PyObject *) op;
00151 }
00152
00153 PyObject *
00154 PyString_FromFormatV(const char *format, va_list vargs)
00155 {
00156 va_list count;
00157 Py_ssize_t n = 0;
00158 const char* f;
00159 char *s;
00160 PyObject* string;
00161
00162 #ifdef VA_LIST_IS_ARRAY
00163 memcpy(count, vargs, sizeof(va_list));
00164 #else
00165 #ifdef __va_copy
00166 __va_copy(count, vargs);
00167 #else
00168 count = vargs;
00169 #endif
00170 #endif
00171
00172 for (f = format; *f; f++) {
00173 if (*f == '%') {
00174 const char* p = f;
00175 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
00176 ;
00177
00178
00179
00180
00181
00182 if (*f == 'l' && *(f+1) == 'd')
00183 ++f;
00184
00185 if (*f == 'z' && *(f+1) == 'd')
00186 ++f;
00187
00188 switch (*f) {
00189 case 'c':
00190 (void)va_arg(count, int);
00191
00192 case '%':
00193 n++;
00194 break;
00195 case 'd': case 'i': case 'x':
00196 (void) va_arg(count, int);
00197
00198
00199
00200 n += 20;
00201 break;
00202 case 's':
00203 s = va_arg(count, char*);
00204 n += strlen(s);
00205 break;
00206 case 'p':
00207 (void) va_arg(count, int);
00208
00209
00210
00211
00212
00213 n += 19;
00214 break;
00215 default:
00216
00217
00218
00219
00220
00221
00222 n += strlen(p);
00223 goto expand;
00224 }
00225 } else
00226 n++;
00227 }
00228 expand:
00229
00230
00231
00232 string = PyString_FromStringAndSize(NULL, n);
00233 if (!string)
00234 return NULL;
00235
00236 s = PyString_AsString(string);
00237
00238 for (f = format; *f; f++) {
00239 if (*f == '%') {
00240 const char* p = f++;
00241 Py_ssize_t i;
00242 int longflag = 0;
00243 int size_tflag = 0;
00244
00245
00246 n = 0;
00247 while (isdigit(Py_CHARMASK(*f)))
00248 n = (n*10) + *f++ - '0';
00249 if (*f == '.') {
00250 f++;
00251 n = 0;
00252 while (isdigit(Py_CHARMASK(*f)))
00253 n = (n*10) + *f++ - '0';
00254 }
00255 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
00256 f++;
00257
00258
00259 if (*f == 'l' && *(f+1) == 'd') {
00260 longflag = 1;
00261 ++f;
00262 }
00263
00264 if (*f == 'z' && *(f+1) == 'd') {
00265 size_tflag = 1;
00266 ++f;
00267 }
00268
00269 switch (*f) {
00270 case 'c':
00271 *s++ = va_arg(vargs, int);
00272 break;
00273 case 'd':
00274 if (longflag)
00275 sprintf(s, "%ld", va_arg(vargs, long));
00276 else if (size_tflag) {
00277
00278
00279
00280 #if SIZEOF_SIZE_T == SIZEOF_LONG
00281 sprintf(s, "%ld", va_arg(vargs, long));
00282 #elif defined(MS_WINDOWS)
00283 sprintf(s, "%Id", va_arg(vargs, size_t));
00284 #else
00285 #error Cannot print size_t values
00286 #endif
00287 }
00288 else
00289 sprintf(s, "%d", va_arg(vargs, int));
00290 s += strlen(s);
00291 break;
00292 case 'i':
00293 sprintf(s, "%i", va_arg(vargs, int));
00294 s += strlen(s);
00295 break;
00296 case 'x':
00297 sprintf(s, "%x", va_arg(vargs, int));
00298 s += strlen(s);
00299 break;
00300 case 's':
00301 p = va_arg(vargs, char*);
00302 i = strlen(p);
00303 if (n > 0 && i > n)
00304 i = n;
00305 memcpy(s, p, i);
00306 s += i;
00307 break;
00308 case 'p':
00309 sprintf(s, "%p", va_arg(vargs, void*));
00310
00311 if (s[1] == 'X')
00312 s[1] = 'x';
00313 else if (s[1] != 'x') {
00314 memmove(s+2, s, strlen(s)+1);
00315 s[0] = '0';
00316 s[1] = 'x';
00317 }
00318 s += strlen(s);
00319 break;
00320 case '%':
00321 *s++ = '%';
00322 break;
00323 default:
00324 strcpy(s, p);
00325 s += strlen(s);
00326 goto end;
00327 }
00328 } else
00329 *s++ = *f;
00330 }
00331
00332 end:
00333 _PyString_Resize(&string, s - PyString_AS_STRING(string));
00334 return string;
00335 }
00336
00337 PyObject *
00338 PyString_FromFormat(const char *format, ...)
00339 {
00340 PyObject* ret;
00341 va_list vargs;
00342
00343 #ifdef HAVE_STDARG_PROTOTYPES
00344 va_start(vargs, format);
00345 #else
00346 va_start(vargs);
00347 #endif
00348 ret = PyString_FromFormatV(format, vargs);
00349 va_end(vargs);
00350 return ret;
00351 }
00352
00353
00354 PyObject *PyString_Decode(const char *s,
00355 Py_ssize_t size,
00356 const char *encoding,
00357 const char *errors)
00358 {
00359 PyObject *v, *str;
00360
00361 str = PyString_FromStringAndSize(s, size);
00362 if (str == NULL)
00363 return NULL;
00364 v = PyString_AsDecodedString(str, encoding, errors);
00365 Py_DECREF(str);
00366 return v;
00367 }
00368
00369 PyObject *PyString_AsDecodedObject(PyObject *str,
00370 const char *encoding,
00371 const char *errors)
00372 {
00373 PyObject *v;
00374
00375 if (!PyString_Check(str)) {
00376 PyErr_BadArgument();
00377 goto onError;
00378 }
00379
00380 if (encoding == NULL) {
00381 #ifdef Py_USING_UNICODE
00382 encoding = PyUnicode_GetDefaultEncoding();
00383 #else
00384 PyErr_SetString(PyExc_ValueError, "no encoding specified");
00385 goto onError;
00386 #endif
00387 }
00388
00389
00390 v = PyCodec_Decode(str, encoding, errors);
00391 if (v == NULL)
00392 goto onError;
00393
00394 return v;
00395
00396 onError:
00397 return NULL;
00398 }
00399
00400 PyObject *PyString_AsDecodedString(PyObject *str,
00401 const char *encoding,
00402 const char *errors)
00403 {
00404 PyObject *v;
00405
00406 v = PyString_AsDecodedObject(str, encoding, errors);
00407 if (v == NULL)
00408 goto onError;
00409
00410 #ifdef Py_USING_UNICODE
00411
00412 if (PyUnicode_Check(v)) {
00413 PyObject *temp = v;
00414 v = PyUnicode_AsEncodedString(v, NULL, NULL);
00415 Py_DECREF(temp);
00416 if (v == NULL)
00417 goto onError;
00418 }
00419 #endif
00420 if (!PyString_Check(v)) {
00421 PyErr_Format(PyExc_TypeError,
00422 "decoder did not return a string object (type=%.400s)",
00423 v->ob_type->tp_name);
00424 Py_DECREF(v);
00425 goto onError;
00426 }
00427
00428 return v;
00429
00430 onError:
00431 return NULL;
00432 }
00433
00434 PyObject *PyString_Encode(const char *s,
00435 Py_ssize_t size,
00436 const char *encoding,
00437 const char *errors)
00438 {
00439 PyObject *v, *str;
00440
00441 str = PyString_FromStringAndSize(s, size);
00442 if (str == NULL)
00443 return NULL;
00444 v = PyString_AsEncodedString(str, encoding, errors);
00445 Py_DECREF(str);
00446 return v;
00447 }
00448
00449 PyObject *PyString_AsEncodedObject(PyObject *str,
00450 const char *encoding,
00451 const char *errors)
00452 {
00453 PyObject *v;
00454
00455 if (!PyString_Check(str)) {
00456 PyErr_BadArgument();
00457 goto onError;
00458 }
00459
00460 if (encoding == NULL) {
00461 #ifdef Py_USING_UNICODE
00462 encoding = PyUnicode_GetDefaultEncoding();
00463 #else
00464 PyErr_SetString(PyExc_ValueError, "no encoding specified");
00465 goto onError;
00466 #endif
00467 }
00468
00469
00470 v = PyCodec_Encode(str, encoding, errors);
00471 if (v == NULL)
00472 goto onError;
00473
00474 return v;
00475
00476 onError:
00477 return NULL;
00478 }
00479
00480 PyObject *PyString_AsEncodedString(PyObject *str,
00481 const char *encoding,
00482 const char *errors)
00483 {
00484 PyObject *v;
00485
00486 v = PyString_AsEncodedObject(str, encoding, errors);
00487 if (v == NULL)
00488 goto onError;
00489
00490 #ifdef Py_USING_UNICODE
00491
00492 if (PyUnicode_Check(v)) {
00493 PyObject *temp = v;
00494 v = PyUnicode_AsEncodedString(v, NULL, NULL);
00495 Py_DECREF(temp);
00496 if (v == NULL)
00497 goto onError;
00498 }
00499 #endif
00500 if (!PyString_Check(v)) {
00501 PyErr_Format(PyExc_TypeError,
00502 "encoder did not return a string object (type=%.400s)",
00503 v->ob_type->tp_name);
00504 Py_DECREF(v);
00505 goto onError;
00506 }
00507
00508 return v;
00509
00510 onError:
00511 return NULL;
00512 }
00513
00514 static void
00515 string_dealloc(PyObject *op)
00516 {
00517 switch (PyString_CHECK_INTERNED(op)) {
00518 case SSTATE_NOT_INTERNED:
00519 break;
00520
00521 case SSTATE_INTERNED_MORTAL:
00522
00523 op->ob_refcnt = 3;
00524 if (PyDict_DelItem(interned, op) != 0)
00525 Py_FatalError(
00526 "deletion of interned string failed");
00527 break;
00528
00529 case SSTATE_INTERNED_IMMORTAL:
00530 Py_FatalError("Immortal interned string died.");
00531
00532 default:
00533 Py_FatalError("Inconsistent interned string state.");
00534 }
00535 op->ob_type->tp_free(op);
00536 }
00537
00538
00539
00540
00541
00542
00543 PyObject *PyString_DecodeEscape(const char *s,
00544 Py_ssize_t len,
00545 const char *errors,
00546 Py_ssize_t unicode,
00547 const char *recode_encoding)
00548 {
00549 int c;
00550 char *p, *buf;
00551 const char *end;
00552 PyObject *v;
00553 Py_ssize_t newlen = recode_encoding ? 4*len:len;
00554 v = PyString_FromStringAndSize((char *)NULL, newlen);
00555 if (v == NULL)
00556 return NULL;
00557 p = buf = PyString_AsString(v);
00558 end = s + len;
00559 while (s < end) {
00560 if (*s != '\\') {
00561 non_esc:
00562 #ifdef Py_USING_UNICODE
00563 if (recode_encoding && (*s & 0x80)) {
00564 PyObject *u, *w;
00565 char *r;
00566 const char* t;
00567 Py_ssize_t rn;
00568 t = s;
00569
00570 while (t < end && (*t & 0x80)) t++;
00571 u = PyUnicode_DecodeUTF8(s, t - s, errors);
00572 if(!u) goto failed;
00573
00574
00575 w = PyUnicode_AsEncodedString(
00576 u, recode_encoding, errors);
00577 Py_DECREF(u);
00578 if (!w) goto failed;
00579
00580
00581 r = PyString_AsString(w);
00582 rn = PyString_Size(w);
00583 memcpy(p, r, rn);
00584 p += rn;
00585 Py_DECREF(w);
00586 s = t;
00587 } else {
00588 *p++ = *s++;
00589 }
00590 #else
00591 *p++ = *s++;
00592 #endif
00593 continue;
00594 }
00595 s++;
00596 if (s==end) {
00597 PyErr_SetString(PyExc_ValueError,
00598 "Trailing \\ in string");
00599 goto failed;
00600 }
00601 switch (*s++) {
00602
00603 case '\n': break;
00604 case '\\': *p++ = '\\'; break;
00605 case '\'': *p++ = '\''; break;
00606 case '\"': *p++ = '\"'; break;
00607 case 'b': *p++ = '\b'; break;
00608 case 'f': *p++ = '\014'; break;
00609 case 't': *p++ = '\t'; break;
00610 case 'n': *p++ = '\n'; break;
00611 case 'r': *p++ = '\r'; break;
00612 case 'v': *p++ = '\013'; break;
00613 case 'a': *p++ = '\007'; break;
00614 case '0': case '1': case '2': case '3':
00615 case '4': case '5': case '6': case '7':
00616 c = s[-1] - '0';
00617 if ('0' <= *s && *s <= '7') {
00618 c = (c<<3) + *s++ - '0';
00619 if ('0' <= *s && *s <= '7')
00620 c = (c<<3) + *s++ - '0';
00621 }
00622 *p++ = c;
00623 break;
00624 case 'x':
00625 if (isxdigit(Py_CHARMASK(s[0]))
00626 && isxdigit(Py_CHARMASK(s[1]))) {
00627 unsigned int x = 0;
00628 c = Py_CHARMASK(*s);
00629 s++;
00630 if (isdigit(c))
00631 x = c - '0';
00632 else if (islower(c))
00633 x = 10 + c - 'a';
00634 else
00635 x = 10 + c - 'A';
00636 x = x << 4;
00637 c = Py_CHARMASK(*s);
00638 s++;
00639 if (isdigit(c))
00640 x += c - '0';
00641 else if (islower(c))
00642 x += 10 + c - 'a';
00643 else
00644 x += 10 + c - 'A';
00645 *p++ = x;
00646 break;
00647 }
00648 if (!errors || strcmp(errors, "strict") == 0) {
00649 PyErr_SetString(PyExc_ValueError,
00650 "invalid \\x escape");
00651 goto failed;
00652 }
00653 if (strcmp(errors, "replace") == 0) {
00654 *p++ = '?';
00655 } else if (strcmp(errors, "ignore") == 0)
00656 ;
00657 else {
00658 PyErr_Format(PyExc_ValueError,
00659 "decoding error; "
00660 "unknown error handling code: %.400s",
00661 errors);
00662 goto failed;
00663 }
00664 #ifndef Py_USING_UNICODE
00665 case 'u':
00666 case 'U':
00667 case 'N':
00668 if (unicode) {
00669 PyErr_SetString(PyExc_ValueError,
00670 "Unicode escapes not legal "
00671 "when Unicode disabled");
00672 goto failed;
00673 }
00674 #endif
00675 default:
00676 *p++ = '\\';
00677 s--;
00678 goto non_esc;
00679
00680 }
00681 }
00682 if (p-buf < newlen)
00683 _PyString_Resize(&v, p - buf);
00684 return v;
00685 failed:
00686 Py_DECREF(v);
00687 return NULL;
00688 }
00689
00690 static Py_ssize_t
00691 string_getsize(register PyObject *op)
00692 {
00693 char *s;
00694 Py_ssize_t len;
00695 if (PyString_AsStringAndSize(op, &s, &len))
00696 return -1;
00697 return len;
00698 }
00699
00700 static char *
00701 string_getbuffer(register PyObject *op)
00702 {
00703 char *s;
00704 Py_ssize_t len;
00705 if (PyString_AsStringAndSize(op, &s, &len))
00706 return NULL;
00707 return s;
00708 }
00709
00710 Py_ssize_t
00711 PyString_Size(register PyObject *op)
00712 {
00713 if (!PyString_Check(op))
00714 return string_getsize(op);
00715 return ((PyStringObject *)op) -> ob_size;
00716 }
00717
00718 char *
00719 PyString_AsString(register PyObject *op)
00720 {
00721 if (!PyString_Check(op))
00722 return string_getbuffer(op);
00723 return ((PyStringObject *)op) -> ob_sval;
00724 }
00725
00726 int
00727 PyString_AsStringAndSize(register PyObject *obj,
00728 register char **s,
00729 register Py_ssize_t *len)
00730 {
00731 if (s == NULL) {
00732 PyErr_BadInternalCall();
00733 return -1;
00734 }
00735
00736 if (!PyString_Check(obj)) {
00737 #ifdef Py_USING_UNICODE
00738 if (PyUnicode_Check(obj)) {
00739 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
00740 if (obj == NULL)
00741 return -1;
00742 }
00743 else
00744 #endif
00745 {
00746 PyErr_Format(PyExc_TypeError,
00747 "expected string or Unicode object, "
00748 "%.200s found", obj->ob_type->tp_name);
00749 return -1;
00750 }
00751 }
00752
00753 *s = PyString_AS_STRING(obj);
00754 if (len != NULL)
00755 *len = PyString_GET_SIZE(obj);
00756 else if (strlen(*s) != PyString_GET_SIZE(obj)) {
00757 PyErr_SetString(PyExc_TypeError,
00758 "expected string without null bytes");
00759 return -1;
00760 }
00761 return 0;
00762 }
00763
00764
00765
00766 static int
00767 string_print(PyStringObject *op, FILE *fp, int flags)
00768 {
00769 Py_ssize_t i;
00770 char c;
00771 int quote;
00772
00773
00774 if (! PyString_CheckExact(op)) {
00775 int ret;
00776
00777 op = (PyStringObject *) PyObject_Str((PyObject *)op);
00778 if (op == NULL)
00779 return -1;
00780 ret = string_print(op, fp, flags);
00781 Py_DECREF(op);
00782 return ret;
00783 }
00784 if (flags & Py_PRINT_RAW) {
00785 #ifdef __VMS
00786 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
00787 #else
00788 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
00789 #endif
00790 return 0;
00791 }
00792
00793
00794 quote = '\'';
00795 if (memchr(op->ob_sval, '\'', op->ob_size) &&
00796 !memchr(op->ob_sval, '"', op->ob_size))
00797 quote = '"';
00798
00799 fputc(quote, fp);
00800 for (i = 0; i < op->ob_size; i++) {
00801 c = op->ob_sval[i];
00802 if (c == quote || c == '\\')
00803 fprintf(fp, "\\%c", c);
00804 else if (c == '\t')
00805 fprintf(fp, "\\t");
00806 else if (c == '\n')
00807 fprintf(fp, "\\n");
00808 else if (c == '\r')
00809 fprintf(fp, "\\r");
00810 else if (c < ' ' || c >= 0x7f)
00811 fprintf(fp, "\\x%02x", c & 0xff);
00812 else
00813 fputc(c, fp);
00814 }
00815 fputc(quote, fp);
00816 return 0;
00817 }
00818
00819 PyObject *
00820 PyString_Repr(PyObject *obj, int smartquotes)
00821 {
00822 register PyStringObject* op = (PyStringObject*) obj;
00823 size_t newsize = 2 + 4 * op->ob_size;
00824 PyObject *v;
00825 if (newsize > INT_MAX) {
00826 PyErr_SetString(PyExc_OverflowError,
00827 "string is too large to make repr");
00828 }
00829 v = PyString_FromStringAndSize((char *)NULL, newsize);
00830 if (v == NULL) {
00831 return NULL;
00832 }
00833 else {
00834 register Py_ssize_t i;
00835 register char c;
00836 register char *p;
00837 int quote;
00838
00839
00840 quote = '\'';
00841 if (smartquotes &&
00842 memchr(op->ob_sval, '\'', op->ob_size) &&
00843 !memchr(op->ob_sval, '"', op->ob_size))
00844 quote = '"';
00845
00846 p = PyString_AS_STRING(v);
00847 *p++ = quote;
00848 for (i = 0; i < op->ob_size; i++) {
00849
00850
00851 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
00852 c = op->ob_sval[i];
00853 if (c == quote || c == '\\')
00854 *p++ = '\\', *p++ = c;
00855 else if (c == '\t')
00856 *p++ = '\\', *p++ = 't';
00857 else if (c == '\n')
00858 *p++ = '\\', *p++ = 'n';
00859 else if (c == '\r')
00860 *p++ = '\\', *p++ = 'r';
00861 else if (c < ' ' || c >= 0x7f) {
00862
00863
00864
00865 sprintf(p, "\\x%02x", c & 0xff);
00866 p += 4;
00867 }
00868 else
00869 *p++ = c;
00870 }
00871 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
00872 *p++ = quote;
00873 *p = '\0';
00874 _PyString_Resize(
00875 &v, (int) (p - PyString_AS_STRING(v)));
00876 return v;
00877 }
00878 }
00879
00880 static PyObject *
00881 string_repr(PyObject *op)
00882 {
00883 return PyString_Repr(op, 1);
00884 }
00885
00886 static PyObject *
00887 string_str(PyObject *s)
00888 {
00889 assert(PyString_Check(s));
00890 if (PyString_CheckExact(s)) {
00891 Py_INCREF(s);
00892 return s;
00893 }
00894 else {
00895
00896 PyStringObject *t = (PyStringObject *) s;
00897 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
00898 }
00899 }
00900
00901 static Py_ssize_t
00902 string_length(PyStringObject *a)
00903 {
00904 return a->ob_size;
00905 }
00906
00907 static PyObject *
00908 string_concat(register PyStringObject *a, register PyObject *bb)
00909 {
00910 register size_t size;
00911 register PyStringObject *op;
00912 if (!PyString_Check(bb)) {
00913 #ifdef Py_USING_UNICODE
00914 if (PyUnicode_Check(bb))
00915 return PyUnicode_Concat((PyObject *)a, bb);
00916 #endif
00917 PyErr_Format(PyExc_TypeError,
00918 "cannot concatenate 'str' and '%.200s' objects",
00919 bb->ob_type->tp_name);
00920 return NULL;
00921 }
00922 #define b ((PyStringObject *)bb)
00923
00924 if ((a->ob_size == 0 || b->ob_size == 0) &&
00925 PyString_CheckExact(a) && PyString_CheckExact(b)) {
00926 if (a->ob_size == 0) {
00927 Py_INCREF(bb);
00928 return bb;
00929 }
00930 Py_INCREF(a);
00931 return (PyObject *)a;
00932 }
00933 size = a->ob_size + b->ob_size;
00934
00935
00936 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
00937 if (op == NULL)
00938 return PyErr_NoMemory();
00939 PyObject_INIT_VAR(op, &PyString_Type, size);
00940 op->ob_shash = -1;
00941 op->ob_sstate = SSTATE_NOT_INTERNED;
00942 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
00943 memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
00944 op->ob_sval[size] = '\0';
00945 return (PyObject *) op;
00946 #undef b
00947 }
00948
00949 static PyObject *
00950 string_repeat(register PyStringObject *a, register Py_ssize_t n)
00951 {
00952 register Py_ssize_t i;
00953 register Py_ssize_t j;
00954 register Py_ssize_t size;
00955 register PyStringObject *op;
00956 size_t nbytes;
00957 if (n < 0)
00958 n = 0;
00959
00960
00961
00962 size = a->ob_size * n;
00963 if (n && size / n != a->ob_size) {
00964 PyErr_SetString(PyExc_OverflowError,
00965 "repeated string is too long");
00966 return NULL;
00967 }
00968 if (size == a->ob_size && PyString_CheckExact(a)) {
00969 Py_INCREF(a);
00970 return (PyObject *)a;
00971 }
00972 nbytes = (size_t)size;
00973 if (nbytes + sizeof(PyStringObject) <= nbytes) {
00974 PyErr_SetString(PyExc_OverflowError,
00975 "repeated string is too long");
00976 return NULL;
00977 }
00978 op = (PyStringObject *)
00979 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
00980 if (op == NULL)
00981 return PyErr_NoMemory();
00982 PyObject_INIT_VAR(op, &PyString_Type, size);
00983 op->ob_shash = -1;
00984 op->ob_sstate = SSTATE_NOT_INTERNED;
00985 op->ob_sval[size] = '\0';
00986 if (a->ob_size == 1 && n > 0) {
00987 memset(op->ob_sval, a->ob_sval[0] , n);
00988 return (PyObject *) op;
00989 }
00990 i = 0;
00991 if (i < size) {
00992 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
00993 i = a->ob_size;
00994 }
00995 while (i < size) {
00996 j = (i <= size-i) ? i : size-i;
00997 memcpy(op->ob_sval+i, op->ob_sval, j);
00998 i += j;
00999 }
01000 return (PyObject *) op;
01001 }
01002
01003
01004
01005 static PyObject *
01006 string_slice(register PyStringObject *a, register Py_ssize_t i,
01007 register Py_ssize_t j)
01008
01009 {
01010 if (i < 0)
01011 i = 0;
01012 if (j < 0)
01013 j = 0;
01014 if (j > a->ob_size)
01015 j = a->ob_size;
01016 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
01017
01018 Py_INCREF(a);
01019 return (PyObject *)a;
01020 }
01021 if (j < i)
01022 j = i;
01023 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
01024 }
01025
01026 static int
01027 string_contains(PyObject *a, PyObject *el)
01028 {
01029 char *s = PyString_AS_STRING(a);
01030 const char *sub = PyString_AS_STRING(el);
01031 char *last;
01032 Py_ssize_t len_sub = PyString_GET_SIZE(el);
01033 Py_ssize_t shortsub;
01034 char firstchar, lastchar;
01035
01036 if (!PyString_CheckExact(el)) {
01037 #ifdef Py_USING_UNICODE
01038 if (PyUnicode_Check(el))
01039 return PyUnicode_Contains(a, el);
01040 #endif
01041 if (!PyString_Check(el)) {
01042 PyErr_SetString(PyExc_TypeError,
01043 "'in <string>' requires string as left operand");
01044 return -1;
01045 }
01046 }
01047
01048 if (len_sub == 0)
01049 return 1;
01050
01051
01052
01053
01054
01055
01056
01057 firstchar = sub[0];
01058 shortsub = len_sub - 1;
01059 lastchar = sub[shortsub];
01060 last = s + PyString_GET_SIZE(a) - len_sub + 1;
01061 while (s < last) {
01062 s = memchr(s, firstchar, last-s);
01063 if (s == NULL)
01064 return 0;
01065 assert(s < last);
01066 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
01067 return 1;
01068 s++;
01069 }
01070 return 0;
01071 }
01072
01073 static PyObject *
01074 string_item(PyStringObject *a, register Py_ssize_t i)
01075 {
01076 PyObject *v;
01077 char *pchar;
01078 if (i < 0 || i >= a->ob_size) {
01079 PyErr_SetString(PyExc_IndexError, "string index out of range");
01080 return NULL;
01081 }
01082 pchar = a->ob_sval + i;
01083 v = (PyObject *)characters[*pchar & UCHAR_MAX];
01084 if (v == NULL)
01085 v = PyString_FromStringAndSize(pchar, 1);
01086 else {
01087 #ifdef COUNT_ALLOCS
01088 one_strings++;
01089 #endif
01090 Py_INCREF(v);
01091 }
01092 return v;
01093 }
01094
01095 static PyObject*
01096 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
01097 {
01098 int c;
01099 Py_ssize_t len_a, len_b;
01100 Py_ssize_t min_len;
01101 PyObject *result;
01102
01103
01104 if (!(PyString_Check(a) && PyString_Check(b))) {
01105 result = Py_NotImplemented;
01106 goto out;
01107 }
01108 if (a == b) {
01109 switch (op) {
01110 case Py_EQ:case Py_LE:case Py_GE:
01111 result = Py_True;
01112 goto out;
01113 case Py_NE:case Py_LT:case Py_GT:
01114 result = Py_False;
01115 goto out;
01116 }
01117 }
01118 if (op == Py_EQ) {
01119
01120
01121 if (a->ob_size == b->ob_size
01122 && (a->ob_sval[0] == b->ob_sval[0]
01123 && memcmp(a->ob_sval, b->ob_sval,
01124 a->ob_size) == 0)) {
01125 result = Py_True;
01126 } else {
01127 result = Py_False;
01128 }
01129 goto out;
01130 }
01131 len_a = a->ob_size; len_b = b->ob_size;
01132 min_len = (len_a < len_b) ? len_a : len_b;
01133 if (min_len > 0) {
01134 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
01135 if (c==0)
01136 c = memcmp(a->ob_sval, b->ob_sval, min_len);
01137 }else
01138 c = 0;
01139 if (c == 0)
01140 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
01141 switch (op) {
01142 case Py_LT: c = c < 0; break;
01143 case Py_LE: c = c <= 0; break;
01144 case Py_EQ: assert(0); break;
01145 case Py_NE: c = c != 0; break;
01146 case Py_GT: c = c > 0; break;
01147 case Py_GE: c = c >= 0; break;
01148 default:
01149 result = Py_NotImplemented;
01150 goto out;
01151 }
01152 result = c ? Py_True : Py_False;
01153 out:
01154 Py_INCREF(result);
01155 return result;
01156 }
01157
01158 int
01159 _PyString_Eq(PyObject *o1, PyObject *o2)
01160 {
01161 PyStringObject *a, *b;
01162 a = (PyStringObject*)o1;
01163 b = (PyStringObject*)o2;
01164 return a->ob_size == b->ob_size
01165 && *a->ob_sval == *b->ob_sval
01166 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
01167 }
01168
01169 static long
01170 string_hash(PyStringObject *a)
01171 {
01172 register Py_ssize_t len;
01173 register unsigned char *p;
01174 register long x;
01175
01176 if (a->ob_shash != -1)
01177 return a->ob_shash;
01178 len = a->ob_size;
01179 p = (unsigned char *) a->ob_sval;
01180 x = *p << 7;
01181 while (--len >= 0)
01182 x = (1000003*x) ^ *p++;
01183 x ^= a->ob_size;
01184 if (x == -1)
01185 x = -2;
01186 a->ob_shash = x;
01187 return x;
01188 }
01189
01190 static PyObject*
01191 string_subscript(PyStringObject* self, PyObject* item)
01192 {
01193 if (PyInt_Check(item) || PyLong_Check(item)) {
01194 Py_ssize_t i = PyInt_AsSsize_t(item);
01195 if (i == -1 && PyErr_Occurred())
01196 return NULL;
01197 if (i < 0)
01198 i += PyString_GET_SIZE(self);
01199 return string_item(self,i);
01200 }
01201 else if (PySlice_Check(item)) {
01202 Py_ssize_t start, stop, step, slicelength, cur, i;
01203 char* source_buf;
01204 char* result_buf;
01205 PyObject* result;
01206
01207 if (PySlice_GetIndicesEx((PySliceObject*)item,
01208 PyString_GET_SIZE(self),
01209 &start, &stop, &step, &slicelength) < 0) {
01210 return NULL;
01211 }
01212
01213 if (slicelength <= 0) {
01214 return PyString_FromStringAndSize("", 0);
01215 }
01216 else {
01217 source_buf = PyString_AsString((PyObject*)self);
01218 result_buf = PyMem_Malloc(slicelength);
01219 if (result_buf == NULL)
01220 return PyErr_NoMemory();
01221
01222 for (cur = start, i = 0; i < slicelength;
01223 cur += step, i++) {
01224 result_buf[i] = source_buf[cur];
01225 }
01226
01227 result = PyString_FromStringAndSize(result_buf,
01228 slicelength);
01229 PyMem_Free(result_buf);
01230 return result;
01231 }
01232 }
01233 else {
01234 PyErr_SetString(PyExc_TypeError,
01235 "string indices must be integers");
01236 return NULL;
01237 }
01238 }
01239
01240 static Py_ssize_t
01241 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
01242 {
01243 if ( index != 0 ) {
01244 PyErr_SetString(PyExc_SystemError,
01245 "accessing non-existent string segment");
01246 return -1;
01247 }
01248 *ptr = (void *)self->ob_sval;
01249 return self->ob_size;
01250 }
01251
01252 static Py_ssize_t
01253 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
01254 {
01255 PyErr_SetString(PyExc_TypeError,
01256 "Cannot use string as modifiable buffer");
01257 return -1;
01258 }
01259
01260 static Py_ssize_t
01261 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
01262 {
01263 if ( lenp )
01264 *lenp = self->ob_size;
01265 return 1;
01266 }
01267
01268 static Py_ssize_t
01269 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
01270 {
01271 if ( index != 0 ) {
01272 PyErr_SetString(PyExc_SystemError,
01273 "accessing non-existent string segment");
01274 return -1;
01275 }
01276 *ptr = self->ob_sval;
01277 return self->ob_size;
01278 }
01279
01280 static PySequenceMethods string_as_sequence = {
01281 (lenfunc)string_length,
01282 (binaryfunc)string_concat,
01283 (ssizeargfunc)string_repeat,
01284 (ssizeargfunc)string_item,
01285 (ssizessizeargfunc)string_slice,
01286 0,
01287 0,
01288 (objobjproc)string_contains
01289 };
01290
01291 static PyMappingMethods string_as_mapping = {
01292 (lenfunc)string_length,
01293 (binaryfunc)string_subscript,
01294 0,
01295 };
01296
01297 static PyBufferProcs string_as_buffer = {
01298 (readbufferproc)string_buffer_getreadbuf,
01299 (writebufferproc)string_buffer_getwritebuf,
01300 (segcountproc)string_buffer_getsegcount,
01301 (charbufferproc)string_buffer_getcharbuf,
01302 };
01303
01304
01305
01306 #define LEFTSTRIP 0
01307 #define RIGHTSTRIP 1
01308 #define BOTHSTRIP 2
01309
01310
01311 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
01312
01313 #define STRIPNAME(i) (stripformat[i]+3)
01314
01315 #define SPLIT_APPEND(data, left, right) \
01316 str = PyString_FromStringAndSize((data) + (left), \
01317 (right) - (left)); \
01318 if (str == NULL) \
01319 goto onError; \
01320 if (PyList_Append(list, str)) { \
01321 Py_DECREF(str); \
01322 goto onError; \
01323 } \
01324 else \
01325 Py_DECREF(str);
01326
01327 #define SPLIT_INSERT(data, left, right) \
01328 str = PyString_FromStringAndSize((data) + (left), \
01329 (right) - (left)); \
01330 if (str == NULL) \
01331 goto onError; \
01332 if (PyList_Insert(list, 0, str)) { \
01333 Py_DECREF(str); \
01334 goto onError; \
01335 } \
01336 else \
01337 Py_DECREF(str);
01338
01339 static PyObject *
01340 split_whitespace(const char *s, Py_ssize_t len, int maxsplit)
01341 {
01342 Py_ssize_t i, j;
01343 PyObject *str;
01344 PyObject *list = PyList_New(0);
01345
01346 if (list == NULL)
01347 return NULL;
01348
01349 for (i = j = 0; i < len; ) {
01350 while (i < len && isspace(Py_CHARMASK(s[i])))
01351 i++;
01352 j = i;
01353 while (i < len && !isspace(Py_CHARMASK(s[i])))
01354 i++;
01355 if (j < i) {
01356 if (maxsplit-- <= 0)
01357 break;
01358 SPLIT_APPEND(s, j, i);
01359 while (i < len && isspace(Py_CHARMASK(s[i])))
01360 i++;
01361 j = i;
01362 }
01363 }
01364 if (j < len) {
01365 SPLIT_APPEND(s, j, len);
01366 }
01367 return list;
01368 onError:
01369 Py_DECREF(list);
01370 return NULL;
01371 }
01372
01373 static PyObject *
01374 split_char(const char *s, Py_ssize_t len, char ch, int maxcount)
01375 {
01376 register Py_ssize_t i, j;
01377 PyObject *str;
01378 PyObject *list = PyList_New(0);
01379
01380 if (list == NULL)
01381 return NULL;
01382
01383 for (i = j = 0; i < len; ) {
01384 if (s[i] == ch) {
01385 if (maxcount-- <= 0)
01386 break;
01387 SPLIT_APPEND(s, j, i);
01388 i = j = i + 1;
01389 } else
01390 i++;
01391 }
01392 if (j <= len) {
01393 SPLIT_APPEND(s, j, len);
01394 }
01395 return list;
01396
01397 onError:
01398 Py_DECREF(list);
01399 return NULL;
01400 }
01401
01402 PyDoc_STRVAR(split__doc__,
01403 "S.split([sep [,maxsplit]]) -> list of strings\n\
01404 \n\
01405 Return a list of the words in the string S, using sep as the\n\
01406 delimiter string. If maxsplit is given, at most maxsplit\n\
01407 splits are done. If sep is not specified or is None, any\n\
01408 whitespace string is a separator.");
01409
01410 static PyObject *
01411 string_split(PyStringObject *self, PyObject *args)
01412 {
01413 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
01414 int err;
01415 int maxsplit = -1;
01416 const char *s = PyString_AS_STRING(self), *sub;
01417 PyObject *list, *item, *subobj = Py_None;
01418
01419 if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
01420 return NULL;
01421 if (maxsplit < 0)
01422 maxsplit = INT_MAX;
01423 if (subobj == Py_None)
01424 return split_whitespace(s, len, maxsplit);
01425 if (PyString_Check(subobj)) {
01426 sub = PyString_AS_STRING(subobj);
01427 n = PyString_GET_SIZE(subobj);
01428 }
01429 #ifdef Py_USING_UNICODE
01430 else if (PyUnicode_Check(subobj))
01431 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
01432 #endif
01433 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
01434 return NULL;
01435
01436 if (n == 0) {
01437 PyErr_SetString(PyExc_ValueError, "empty separator");
01438 return NULL;
01439 }
01440 else if (n == 1)
01441 return split_char(s, len, sub[0], maxsplit);
01442
01443 list = PyList_New(0);
01444 if (list == NULL)
01445 return NULL;
01446
01447 i = j = 0;
01448 while (i+n <= len) {
01449 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
01450 if (maxsplit-- <= 0)
01451 break;
01452 item = PyString_FromStringAndSize(s+j, i-j);
01453 if (item == NULL)
01454 goto fail;
01455 err = PyList_Append(list, item);
01456 Py_DECREF(item);
01457 if (err < 0)
01458 goto fail;
01459 i = j = i + n;
01460 }
01461 else
01462 i++;
01463 }
01464 item = PyString_FromStringAndSize(s+j, len-j);
01465 if (item == NULL)
01466 goto fail;
01467 err = PyList_Append(list, item);
01468 Py_DECREF(item);
01469 if (err < 0)
01470 goto fail;
01471
01472 return list;
01473
01474 fail:
01475 Py_DECREF(list);
01476 return NULL;
01477 }
01478
01479 static PyObject *
01480 rsplit_whitespace(const char *s, Py_ssize_t len, int maxsplit)
01481 {
01482 Py_ssize_t i, j;
01483 PyObject *str;
01484 PyObject *list = PyList_New(0);
01485
01486 if (list == NULL)
01487 return NULL;
01488
01489 for (i = j = len - 1; i >= 0; ) {
01490 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
01491 i--;
01492 j = i;
01493 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
01494 i--;
01495 if (j > i) {
01496 if (maxsplit-- <= 0)
01497 break;
01498 SPLIT_INSERT(s, i + 1, j + 1);
01499 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
01500 i--;
01501 j = i;
01502 }
01503 }
01504 if (j >= 0) {
01505 SPLIT_INSERT(s, 0, j + 1);
01506 }
01507 return list;
01508 onError:
01509 Py_DECREF(list);
01510 return NULL;
01511 }
01512
01513 static PyObject *
01514 rsplit_char(const char *s, Py_ssize_t len, char ch, int maxcount)
01515 {
01516 register Py_ssize_t i, j;
01517 PyObject *str;
01518 PyObject *list = PyList_New(0);
01519
01520 if (list == NULL)
01521 return NULL;
01522
01523 for (i = j = len - 1; i >= 0; ) {
01524 if (s[i] == ch) {
01525 if (maxcount-- <= 0)
01526 break;
01527 SPLIT_INSERT(s, i + 1, j + 1);
01528 j = i = i - 1;
01529 } else
01530 i--;
01531 }
01532 if (j >= -1) {
01533 SPLIT_INSERT(s, 0, j + 1);
01534 }
01535 return list;
01536
01537 onError:
01538 Py_DECREF(list);
01539 return NULL;
01540 }
01541
01542 PyDoc_STRVAR(rsplit__doc__,
01543 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
01544 \n\
01545 Return a list of the words in the string S, using sep as the\n\
01546 delimiter string, starting at the end of the string and working\n\
01547 to the front. If maxsplit is given, at most maxsplit splits are\n\
01548 done. If sep is not specified or is None, any whitespace string\n\
01549 is a separator.");
01550
01551 static PyObject *
01552 string_rsplit(PyStringObject *self, PyObject *args)
01553 {
01554 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
01555 int err;
01556 int maxsplit = -1;
01557 const char *s = PyString_AS_STRING(self), *sub;
01558 PyObject *list, *item, *subobj = Py_None;
01559
01560 if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
01561 return NULL;
01562 if (maxsplit < 0)
01563 maxsplit = INT_MAX;
01564 if (subobj == Py_None)
01565 return rsplit_whitespace(s, len, maxsplit);
01566 if (PyString_Check(subobj)) {
01567 sub = PyString_AS_STRING(subobj);
01568 n = PyString_GET_SIZE(subobj);
01569 }
01570 #ifdef Py_USING_UNICODE
01571 else if (PyUnicode_Check(subobj))
01572 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
01573 #endif
01574 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
01575 return NULL;
01576
01577 if (n == 0) {
01578 PyErr_SetString(PyExc_ValueError, "empty separator");
01579 return NULL;
01580 }
01581 else if (n == 1)
01582 return rsplit_char(s, len, sub[0], maxsplit);
01583
01584 list = PyList_New(0);
01585 if (list == NULL)
01586 return NULL;
01587
01588 j = len;
01589 i = j - n;
01590 while (i >= 0) {
01591 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
01592 if (maxsplit-- <= 0)
01593 break;
01594 item = PyString_FromStringAndSize(s+i+n, j-i-n);
01595 if (item == NULL)
01596 goto fail;
01597 err = PyList_Insert(list, 0, item);
01598 Py_DECREF(item);
01599 if (err < 0)
01600 goto fail;
01601 j = i;
01602 i -= n;
01603 }
01604 else
01605 i--;
01606 }
01607 item = PyString_FromStringAndSize(s, j);
01608 if (item == NULL)
01609 goto fail;
01610 err = PyList_Insert(list, 0, item);
01611 Py_DECREF(item);
01612 if (err < 0)
01613 goto fail;
01614
01615 return list;
01616
01617 fail:
01618 Py_DECREF(list);
01619 return NULL;
01620 }
01621
01622
01623 PyDoc_STRVAR(join__doc__,
01624 "S.join(sequence) -> string\n\
01625 \n\
01626 Return a string which is the concatenation of the strings in the\n\
01627 sequence. The separator between elements is S.");
01628
01629 static PyObject *
01630 string_join(PyStringObject *self, PyObject *orig)
01631 {
01632 char *sep = PyString_AS_STRING(self);
01633 const Py_ssize_t seplen = PyString_GET_SIZE(self);
01634 PyObject *res = NULL;
01635 char *p;
01636 Py_ssize_t seqlen = 0;
01637 size_t sz = 0;
01638 Py_ssize_t i;
01639 PyObject *seq, *item;
01640
01641 seq = PySequence_Fast(orig, "");
01642 if (seq == NULL) {
01643 return NULL;
01644 }
01645
01646 seqlen = PySequence_Size(seq);
01647 if (seqlen == 0) {
01648 Py_DECREF(seq);
01649 return PyString_FromString("");
01650 }
01651 if (seqlen == 1) {
01652 item = PySequence_Fast_GET_ITEM(seq, 0);
01653 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
01654 Py_INCREF(item);
01655 Py_DECREF(seq);
01656 return item;
01657 }
01658 }
01659
01660
01661
01662
01663
01664
01665
01666 for (i = 0; i < seqlen; i++) {
01667 const size_t old_sz = sz;
01668 item = PySequence_Fast_GET_ITEM(seq, i);
01669 if (!PyString_Check(item)){
01670 #ifdef Py_USING_UNICODE
01671 if (PyUnicode_Check(item)) {
01672
01673
01674
01675
01676
01677 PyObject *result;
01678 result = PyUnicode_Join((PyObject *)self, seq);
01679 Py_DECREF(seq);
01680 return result;
01681 }
01682 #endif
01683 PyErr_Format(PyExc_TypeError,
01684 "sequence item %i: expected string,"
01685 " %.80s found",
01686 (int)i, item->ob_type->tp_name);
01687 Py_DECREF(seq);
01688 return NULL;
01689 }
01690 sz += PyString_GET_SIZE(item);
01691 if (i != 0)
01692 sz += seplen;
01693 if (sz < old_sz || sz > INT_MAX) {
01694 PyErr_SetString(PyExc_OverflowError,
01695 "join() is too long for a Python string");
01696 Py_DECREF(seq);
01697 return NULL;
01698 }
01699 }
01700
01701
01702 res = PyString_FromStringAndSize((char*)NULL, sz);
01703 if (res == NULL) {
01704 Py_DECREF(seq);
01705 return NULL;
01706 }
01707
01708
01709 p = PyString_AS_STRING(res);
01710 for (i = 0; i < seqlen; ++i) {
01711 size_t n;
01712 item = PySequence_Fast_GET_ITEM(seq, i);
01713 n = PyString_GET_SIZE(item);
01714 memcpy(p, PyString_AS_STRING(item), n);
01715 p += n;
01716 if (i < seqlen - 1) {
01717 memcpy(p, sep, seplen);
01718 p += seplen;
01719 }
01720 }
01721
01722 Py_DECREF(seq);
01723 return res;
01724 }
01725
01726 PyObject *
01727 _PyString_Join(PyObject *sep, PyObject *x)
01728 {
01729 assert(sep != NULL && PyString_Check(sep));
01730 assert(x != NULL);
01731 return string_join((PyStringObject *)sep, x);
01732 }
01733
01734 static void
01735 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
01736 {
01737 if (*end > len)
01738 *end = len;
01739 else if (*end < 0)
01740 *end += len;
01741 if (*end < 0)
01742 *end = 0;
01743 if (*start < 0)
01744 *start += len;
01745 if (*start < 0)
01746 *start = 0;
01747 }
01748
01749 static Py_ssize_t
01750 string_find_internal(PyStringObject *self, PyObject *args, int dir)
01751 {
01752 const char *s = PyString_AS_STRING(self), *sub;
01753 Py_ssize_t len = PyString_GET_SIZE(self);
01754 Py_ssize_t n, i = 0, last = INT_MAX;
01755 PyObject *subobj;
01756
01757
01758 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
01759 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
01760 return -2;
01761 if (PyString_Check(subobj)) {
01762 sub = PyString_AS_STRING(subobj);
01763 n = PyString_GET_SIZE(subobj);
01764 }
01765 #ifdef Py_USING_UNICODE
01766 else if (PyUnicode_Check(subobj))
01767 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
01768 #endif
01769 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
01770 return -2;
01771
01772 string_adjust_indices(&i, &last, len);
01773
01774 if (dir > 0) {
01775 if (n == 0 && i <= last)
01776 return (long)i;
01777 last -= n;
01778 for (; i <= last; ++i)
01779 if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
01780 return (long)i;
01781 }
01782 else {
01783 Py_ssize_t j;
01784
01785 if (n == 0 && i <= last)
01786 return last;
01787 for (j = last-n; j >= i; --j)
01788 if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
01789 return j;
01790 }
01791
01792 return -1;
01793 }
01794
01795
01796 PyDoc_STRVAR(find__doc__,
01797 "S.find(sub [,start [,end]]) -> int\n\
01798 \n\
01799 Return the lowest index in S where substring sub is found,\n\
01800 such that sub is contained within s[start,end]. Optional\n\
01801 arguments start and end are interpreted as in slice notation.\n\
01802 \n\
01803 Return -1 on failure.");
01804
01805 static PyObject *
01806 string_find(PyStringObject *self, PyObject *args)
01807 {
01808 Py_ssize_t result = string_find_internal(self, args, +1);
01809 if (result == -2)
01810 return NULL;
01811 return PyInt_FromSsize_t(result);
01812 }
01813
01814
01815 PyDoc_STRVAR(index__doc__,
01816 "S.index(sub [,start [,end]]) -> int\n\
01817 \n\
01818 Like S.find() but raise ValueError when the substring is not found.");
01819
01820 static PyObject *
01821 string_index(PyStringObject *self, PyObject *args)
01822 {
01823 Py_ssize_t result = string_find_internal(self, args, +1);
01824 if (result == -2)
01825 return NULL;
01826 if (result == -1) {
01827 PyErr_SetString(PyExc_ValueError,
01828 "substring not found");
01829 return NULL;
01830 }
01831 return PyInt_FromSsize_t(result);
01832 }
01833
01834
01835 PyDoc_STRVAR(rfind__doc__,
01836 "S.rfind(sub [,start [,end]]) -> int\n\
01837 \n\
01838 Return the highest index in S where substring sub is found,\n\
01839 such that sub is contained within s[start,end]. Optional\n\
01840 arguments start and end are interpreted as in slice notation.\n\
01841 \n\
01842 Return -1 on failure.");
01843
01844 static PyObject *
01845 string_rfind(PyStringObject *self, PyObject *args)
01846 {
01847 Py_ssize_t result = string_find_internal(self, args, -1);
01848 if (result == -2)
01849 return NULL;
01850 return PyInt_FromSsize_t(result);
01851 }
01852
01853
01854 PyDoc_STRVAR(rindex__doc__,
01855 "S.rindex(sub [,start [,end]]) -> int\n\
01856 \n\
01857 Like S.rfind() but raise ValueError when the substring is not found.");
01858
01859 static PyObject *
01860 string_rindex(PyStringObject *self, PyObject *args)
01861 {
01862 Py_ssize_t result = string_find_internal(self, args, -1);
01863 if (result == -2)
01864 return NULL;
01865 if (result == -1) {
01866 PyErr_SetString(PyExc_ValueError,
01867 "substring not found");
01868 return NULL;
01869 }
01870 return PyInt_FromSsize_t(result);
01871 }
01872
01873
01874 static PyObject *
01875 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
01876 {
01877 char *s = PyString_AS_STRING(self);
01878 Py_ssize_t len = PyString_GET_SIZE(self);
01879 char *sep = PyString_AS_STRING(sepobj);
01880 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
01881 Py_ssize_t i, j;
01882
01883 i = 0;
01884 if (striptype != RIGHTSTRIP) {
01885 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
01886 i++;
01887 }
01888 }
01889
01890 j = len;
01891 if (striptype != LEFTSTRIP) {
01892 do {
01893 j--;
01894 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
01895 j++;
01896 }
01897
01898 if (i == 0 && j == len && PyString_CheckExact(self)) {
01899 Py_INCREF(self);
01900 return (PyObject*)self;
01901 }
01902 else
01903 return PyString_FromStringAndSize(s+i, j-i);
01904 }
01905
01906
01907 static PyObject *
01908 do_strip(PyStringObject *self, int striptype)
01909 {
01910 char *s = PyString_AS_STRING(self);
01911 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
01912
01913 i = 0;
01914 if (striptype != RIGHTSTRIP) {
01915 while (i < len && isspace(Py_CHARMASK(s[i]))) {
01916 i++;
01917 }
01918 }
01919
01920 j = len;
01921 if (striptype != LEFTSTRIP) {
01922 do {
01923 j--;
01924 } while (j >= i && isspace(Py_CHARMASK(s[j])));
01925 j++;
01926 }
01927
01928 if (i == 0 && j == len && PyString_CheckExact(self)) {
01929 Py_INCREF(self);
01930 return (PyObject*)self;
01931 }
01932 else
01933 return PyString_FromStringAndSize(s+i, j-i);
01934 }
01935
01936
01937 static PyObject *
01938 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
01939 {
01940 PyObject *sep = NULL;
01941
01942 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
01943 return NULL;
01944
01945 if (sep != NULL && sep != Py_None) {
01946 if (PyString_Check(sep))
01947 return do_xstrip(self, striptype, sep);
01948 #ifdef Py_USING_UNICODE
01949 else if (PyUnicode_Check(sep)) {
01950 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
01951 PyObject *res;
01952 if (uniself==NULL)
01953 return NULL;
01954 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
01955 striptype, sep);
01956 Py_DECREF(uniself);
01957 return res;
01958 }
01959 #endif
01960 else {
01961 PyErr_Format(PyExc_TypeError,
01962 #ifdef Py_USING_UNICODE
01963 "%s arg must be None, str or unicode",
01964 #else
01965 "%s arg must be None or str",
01966 #endif
01967 STRIPNAME(striptype));
01968 return NULL;
01969 }
01970 return do_xstrip(self, striptype, sep);
01971 }
01972
01973 return do_strip(self, striptype);
01974 }
01975
01976
01977 PyDoc_STRVAR(strip__doc__,
01978 "S.strip([chars]) -> string or unicode\n\
01979 \n\
01980 Return a copy of the string S with leading and trailing\n\
01981 whitespace removed.\n\
01982 If chars is given and not None, remove characters in chars instead.\n\
01983 If chars is unicode, S will be converted to unicode before stripping");
01984
01985 static PyObject *
01986 string_strip(PyStringObject *self, PyObject *args)
01987 {
01988 if (PyTuple_GET_SIZE(args) == 0)
01989 return do_strip(self, BOTHSTRIP);
01990 else
01991 return do_argstrip(self, BOTHSTRIP, args);
01992 }
01993
01994
01995 PyDoc_STRVAR(lstrip__doc__,
01996 "S.lstrip([chars]) -> string or unicode\n\
01997 \n\
01998 Return a copy of the string S with leading whitespace removed.\n\
01999 If chars is given and not None, remove characters in chars instead.\n\
02000 If chars is unicode, S will be converted to unicode before stripping");
02001
02002 static PyObject *
02003 string_lstrip(PyStringObject *self, PyObject *args)
02004 {
02005 if (PyTuple_GET_SIZE(args) == 0)
02006 return do_strip(self, LEFTSTRIP);
02007 else
02008 return do_argstrip(self, LEFTSTRIP, args);
02009 }
02010
02011
02012 PyDoc_STRVAR(rstrip__doc__,
02013 "S.rstrip([chars]) -> string or unicode\n\
02014 \n\
02015 Return a copy of the string S with trailing whitespace removed.\n\
02016 If chars is given and not None, remove characters in chars instead.\n\
02017 If chars is unicode, S will be converted to unicode before stripping");
02018
02019 static PyObject *
02020 string_rstrip(PyStringObject *self, PyObject *args)
02021 {
02022 if (PyTuple_GET_SIZE(args) == 0)
02023 return do_strip(self, RIGHTSTRIP);
02024 else
02025 return do_argstrip(self, RIGHTSTRIP, args);
02026 }
02027
02028
02029 PyDoc_STRVAR(lower__doc__,
02030 "S.lower() -> string\n\
02031 \n\
02032 Return a copy of the string S converted to lowercase.");
02033
02034 static PyObject *
02035 string_lower(PyStringObject *self)
02036 {
02037 char *s = PyString_AS_STRING(self), *s_new;
02038 Py_ssize_t i, n = PyString_GET_SIZE(self);
02039 PyObject *new;
02040
02041 new = PyString_FromStringAndSize(NULL, n);
02042 if (new == NULL)
02043 return NULL;
02044 s_new = PyString_AsString(new);
02045 for (i = 0; i < n; i++) {
02046 int c = Py_CHARMASK(*s++);
02047 if (isupper(c)) {
02048 *s_new = tolower(c);
02049 } else
02050 *s_new = c;
02051 s_new++;
02052 }
02053 return new;
02054 }
02055
02056
02057 PyDoc_STRVAR(upper__doc__,
02058 "S.upper() -> string\n\
02059 \n\
02060 Return a copy of the string S converted to uppercase.");
02061
02062 static PyObject *
02063 string_upper(PyStringObject *self)
02064 {
02065 char *s = PyString_AS_STRING(self), *s_new;
02066 Py_ssize_t i, n = PyString_GET_SIZE(self);
02067 PyObject *new;
02068
02069 new = PyString_FromStringAndSize(NULL, n);
02070 if (new == NULL)
02071 return NULL;
02072 s_new = PyString_AsString(new);
02073 for (i = 0; i < n; i++) {
02074 int c = Py_CHARMASK(*s++);
02075 if (islower(c)) {
02076 *s_new = toupper(c);
02077 } else
02078 *s_new = c;
02079 s_new++;
02080 }
02081 return new;
02082 }
02083
02084
02085 PyDoc_STRVAR(title__doc__,
02086 "S.title() -> string\n\
02087 \n\
02088 Return a titlecased version of S, i.e. words start with uppercase\n\
02089 characters, all remaining cased characters have lowercase.");
02090
02091 static PyObject*
02092 string_title(PyStringObject *self)
02093 {
02094 char *s = PyString_AS_STRING(self), *s_new;
02095 Py_ssize_t i, n = PyString_GET_SIZE(self);
02096 int previous_is_cased = 0;
02097 PyObject *new;
02098
02099 new = PyString_FromStringAndSize(NULL, n);
02100 if (new == NULL)
02101 return NULL;
02102 s_new = PyString_AsString(new);
02103 for (i = 0; i < n; i++) {
02104 int c = Py_CHARMASK(*s++);
02105 if (islower(c)) {
02106 if (!previous_is_cased)
02107 c = toupper(c);
02108 previous_is_cased = 1;
02109 } else if (isupper(c)) {
02110 if (previous_is_cased)
02111 c = tolower(c);
02112 previous_is_cased = 1;
02113 } else
02114 previous_is_cased = 0;
02115 *s_new++ = c;
02116 }
02117 return new;
02118 }
02119
02120 PyDoc_STRVAR(capitalize__doc__,
02121 "S.capitalize() -> string\n\
02122 \n\
02123 Return a copy of the string S with only its first character\n\
02124 capitalized.");
02125
02126 static PyObject *
02127 string_capitalize(PyStringObject *self)
02128 {
02129 char *s = PyString_AS_STRING(self), *s_new;
02130 Py_ssize_t i, n = PyString_GET_SIZE(self);
02131 PyObject *new;
02132
02133 new = PyString_FromStringAndSize(NULL, n);
02134 if (new == NULL)
02135 return NULL;
02136 s_new = PyString_AsString(new);
02137 if (0 < n) {
02138 int c = Py_CHARMASK(*s++);
02139 if (islower(c))
02140 *s_new = toupper(c);
02141 else
02142 *s_new = c;
02143 s_new++;
02144 }
02145 for (i = 1; i < n; i++) {
02146 int c = Py_CHARMASK(*s++);
02147 if (isupper(c))
02148 *s_new = tolower(c);
02149 else
02150 *s_new = c;
02151 s_new++;
02152 }
02153 return new;
02154 }
02155
02156
02157 PyDoc_STRVAR(count__doc__,
02158 "S.count(sub[, start[, end]]) -> int\n\
02159 \n\
02160 Return the number of occurrences of substring sub in string\n\
02161 S[start:end]. Optional arguments start and end are\n\
02162 interpreted as in slice notation.");
02163
02164 static PyObject *
02165 string_count(PyStringObject *self, PyObject *args)
02166 {
02167 const char *s = PyString_AS_STRING(self), *sub, *t;
02168 Py_ssize_t len = PyString_GET_SIZE(self), n;
02169 Py_ssize_t i = 0, last = INT_MAX;
02170 Py_ssize_t m, r;
02171 PyObject *subobj;
02172
02173 if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
02174 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
02175 return NULL;
02176
02177 if (PyString_Check(subobj)) {
02178 sub = PyString_AS_STRING(subobj);
02179 n = PyString_GET_SIZE(subobj);
02180 }
02181 #ifdef Py_USING_UNICODE
02182 else if (PyUnicode_Check(subobj)) {
02183 Py_ssize_t count;
02184 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
02185 if (count == -1)
02186 return NULL;
02187 else
02188 return PyInt_FromLong((long) count);
02189 }
02190 #endif
02191 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
02192 return NULL;
02193
02194 string_adjust_indices(&i, &last, len);
02195
02196 m = last + 1 - n;
02197 if (n == 0)
02198 return PyInt_FromSsize_t(m-i);
02199
02200 r = 0;
02201 while (i < m) {
02202 if (!memcmp(s+i, sub, n)) {
02203 r++;
02204 i += n;
02205 } else {
02206 i++;
02207 }
02208 if (i >= m)
02209 break;
02210 t = memchr(s+i, sub[0], m-i);
02211 if (t == NULL)
02212 break;
02213 i = t - s;
02214 }
02215 return PyInt_FromSsize_t(r);
02216 }
02217
02218 PyDoc_STRVAR(swapcase__doc__,
02219 "S.swapcase() -> string\n\
02220 \n\
02221 Return a copy of the string S with uppercase characters\n\
02222 converted to lowercase and vice versa.");
02223
02224 static PyObject *
02225 string_swapcase(PyStringObject *self)
02226 {
02227 char *s = PyString_AS_STRING(self), *s_new;
02228 Py_ssize_t i, n = PyString_GET_SIZE(self);
02229 PyObject *new;
02230
02231 new = PyString_FromStringAndSize(NULL, n);
02232 if (new == NULL)
02233 return NULL;
02234 s_new = PyString_AsString(new);
02235 for (i = 0; i < n; i++) {
02236 int c = Py_CHARMASK(*s++);
02237 if (islower(c)) {
02238 *s_new = toupper(c);
02239 }
02240 else if (isupper(c)) {
02241 *s_new = tolower(c);
02242 }
02243 else
02244 *s_new = c;
02245 s_new++;
02246 }
02247 return new;
02248 }
02249
02250
02251 PyDoc_STRVAR(translate__doc__,
02252 "S.translate(table [,deletechars]) -> string\n\
02253 \n\
02254 Return a copy of the string S, where all characters occurring\n\
02255 in the optional argument deletechars are removed, and the\n\
02256 remaining characters have been mapped through the given\n\
02257 translation table, which must be a string of length 256.");
02258
02259 static PyObject *
02260 string_translate(PyStringObject *self, PyObject *args)
02261 {
02262 register char *input, *output;
02263 register const char *table;
02264 register Py_ssize_t i, c, changed = 0;
02265 PyObject *input_obj = (PyObject*)self;
02266 const char *table1, *output_start, *del_table=NULL;
02267 Py_ssize_t inlen, tablen, dellen = 0;
02268 PyObject *result;
02269 int trans_table[256];
02270 PyObject *tableobj, *delobj = NULL;
02271
02272 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
02273 &tableobj, &delobj))
02274 return NULL;
02275
02276 if (PyString_Check(tableobj)) {
02277 table1 = PyString_AS_STRING(tableobj);
02278 tablen = PyString_GET_SIZE(tableobj);
02279 }
02280 #ifdef Py_USING_UNICODE
02281 else if (PyUnicode_Check(tableobj)) {
02282
02283
02284
02285 if (delobj != NULL) {
02286 PyErr_SetString(PyExc_TypeError,
02287 "deletions are implemented differently for unicode");
02288 return NULL;
02289 }
02290 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
02291 }
02292 #endif
02293 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
02294 return NULL;
02295
02296 if (tablen != 256) {
02297 PyErr_SetString(PyExc_ValueError,
02298 "translation table must be 256 characters long");
02299 return NULL;
02300 }
02301
02302 if (delobj != NULL) {
02303 if (PyString_Check(delobj)) {
02304 del_table = PyString_AS_STRING(delobj);
02305 dellen = PyString_GET_SIZE(delobj);
02306 }
02307 #ifdef Py_USING_UNICODE
02308 else if (PyUnicode_Check(delobj)) {
02309 PyErr_SetString(PyExc_TypeError,
02310 "deletions are implemented differently for unicode");
02311 return NULL;
02312 }
02313 #endif
02314 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
02315 return NULL;
02316 }
02317 else {
02318 del_table = NULL;
02319 dellen = 0;
02320 }
02321
02322 table = table1;
02323 inlen = PyString_Size(input_obj);
02324 result = PyString_FromStringAndSize((char *)NULL, inlen);
02325 if (result == NULL)
02326 return NULL;
02327 output_start = output = PyString_AsString(result);
02328 input = PyString_AsString(input_obj);
02329
02330 if (dellen == 0) {
02331
02332 for (i = inlen; --i >= 0; ) {
02333 c = Py_CHARMASK(*input++);
02334 if (Py_CHARMASK((*output++ = table[c])) != c)
02335 changed = 1;
02336 }
02337 if (changed || !PyString_CheckExact(input_obj))
02338 return result;
02339 Py_DECREF(result);
02340 Py_INCREF(input_obj);
02341 return input_obj;
02342 }
02343
02344 for (i = 0; i < 256; i++)
02345 trans_table[i] = Py_CHARMASK(table[i]);
02346
02347 for (i = 0; i < dellen; i++)
02348 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
02349
02350 for (i = inlen; --i >= 0; ) {
02351 c = Py_CHARMASK(*input++);
02352 if (trans_table[c] != -1)
02353 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
02354 continue;
02355 changed = 1;
02356 }
02357 if (!changed && PyString_CheckExact(input_obj)) {
02358 Py_DECREF(result);
02359 Py_INCREF(input_obj);
02360 return input_obj;
02361 }
02362
02363 if (inlen > 0)
02364 _PyString_Resize(&result, output - output_start);
02365 return result;
02366 }
02367
02368
02369
02370
02371
02372
02373
02374
02375
02376
02377
02378
02379
02380
02381 static Py_ssize_t
02382 mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
02383 {
02384 register Py_ssize_t ii;
02385
02386
02387 len -= pat_len;
02388
02389 for (ii = 0; ii <= len; ii++) {
02390 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
02391 return ii;
02392 }
02393 }
02394 return -1;
02395 }
02396
02397
02398
02399
02400
02401
02402
02403
02404 static Py_ssize_t
02405 mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
02406 {
02407 register Py_ssize_t offset = 0;
02408 Py_ssize_t nfound = 0;
02409
02410 while (len >= 0) {
02411 offset = mymemfind(mem, len, pat, pat_len);
02412 if (offset == -1)
02413 break;
02414 mem += offset + pat_len;
02415 len -= offset + pat_len;
02416 nfound++;
02417 }
02418 return nfound;
02419 }
02420
02421
02422
02423
02424
02425
02426
02427
02428
02429
02430
02431
02432
02433
02434
02435
02436
02437
02438
02439
02440 static char *
02441 mymemreplace(const char *str, Py_ssize_t len,
02442 const char *pat, Py_ssize_t pat_len,
02443 const char *sub, Py_ssize_t sub_len,
02444 Py_ssize_t count,
02445 Py_ssize_t *out_len)
02446 {
02447 char *out_s;
02448 char *new_s;
02449 Py_ssize_t nfound, offset, new_len;
02450
02451 if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
02452 goto return_same;
02453
02454
02455 nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
02456 if (count < 0)
02457 count = INT_MAX;
02458 else if (nfound > count)
02459 nfound = count;
02460 if (nfound == 0)
02461 goto return_same;
02462
02463 new_len = len + nfound*(sub_len - pat_len);
02464 if (new_len == 0) {
02465
02466 out_s = (char *)PyMem_MALLOC(1);
02467 if (out_s == NULL)
02468 return NULL;
02469 out_s[0] = '\0';
02470 }
02471 else {
02472 assert(new_len > 0);
02473 new_s = (char *)PyMem_MALLOC(new_len);
02474 if (new_s == NULL)
02475 return NULL;
02476 out_s = new_s;
02477
02478 if (pat_len > 0) {
02479 for (; nfound > 0; --nfound) {
02480
02481 offset = mymemfind(str, len, pat, pat_len);
02482 if (offset == -1)
02483 break;
02484
02485
02486 memcpy(new_s, str, offset);
02487 str += offset + pat_len;
02488 len -= offset + pat_len;
02489
02490
02491 new_s += offset;
02492 memcpy(new_s, sub, sub_len);
02493 new_s += sub_len;
02494 }
02495
02496 if (len > 0)
02497 memcpy(new_s, str, len);
02498 }
02499 else {
02500 for (;;++str, --len) {
02501 memcpy(new_s, sub, sub_len);
02502 new_s += sub_len;
02503 if (--nfound <= 0) {
02504 memcpy(new_s, str, len);
02505 break;
02506 }
02507 *new_s++ = *str;
02508 }
02509 }
02510 }
02511 *out_len = new_len;
02512 return out_s;
02513
02514 return_same:
02515 *out_len = -1;
02516 return (char *)str;
02517 }
02518
02519
02520 PyDoc_STRVAR(replace__doc__,
02521 "S.replace (old, new[, count]) -> string\n\
02522 \n\
02523 Return a copy of string S with all occurrences of substring\n\
02524 old replaced by new. If the optional argument count is\n\
02525 given, only the first count occurrences are replaced.");
02526
02527 static PyObject *
02528 string_replace(PyStringObject *self, PyObject *args)
02529 {
02530 const char *str = PyString_AS_STRING(self), *sub, *repl;
02531 char *new_s;
02532 const Py_ssize_t len = PyString_GET_SIZE(self);
02533 Py_ssize_t sub_len, repl_len, out_len;
02534 int count = -1;
02535 PyObject *new;
02536 PyObject *subobj, *replobj;
02537
02538 if (!PyArg_ParseTuple(args, "OO|i:replace",
02539 &subobj, &replobj, &count))
02540 return NULL;
02541
02542 if (PyString_Check(subobj)) {
02543 sub = PyString_AS_STRING(subobj);
02544 sub_len = PyString_GET_SIZE(subobj);
02545 }
02546 #ifdef Py_USING_UNICODE
02547 else if (PyUnicode_Check(subobj))
02548 return PyUnicode_Replace((PyObject *)self,
02549 subobj, replobj, count);
02550 #endif
02551 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
02552 return NULL;
02553
02554 if (PyString_Check(replobj)) {
02555 repl = PyString_AS_STRING(replobj);
02556 repl_len = PyString_GET_SIZE(replobj);
02557 }
02558 #ifdef Py_USING_UNICODE
02559 else if (PyUnicode_Check(replobj))
02560 return PyUnicode_Replace((PyObject *)self,
02561 subobj, replobj, count);
02562 #endif
02563 else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
02564 return NULL;
02565
02566 new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
02567 if (new_s == NULL) {
02568 PyErr_NoMemory();
02569 return NULL;
02570 }
02571 if (out_len == -1) {
02572 if (PyString_CheckExact(self)) {
02573
02574 new = (PyObject*)self;
02575 Py_INCREF(new);
02576 }
02577 else {
02578 new = PyString_FromStringAndSize(str, len);
02579 if (new == NULL)
02580 return NULL;
02581 }
02582 }
02583 else {
02584 new = PyString_FromStringAndSize(new_s, out_len);
02585 PyMem_FREE(new_s);
02586 }
02587 return new;
02588 }
02589
02590
02591 PyDoc_STRVAR(startswith__doc__,
02592 "S.startswith(prefix[, start[, end]]) -> bool\n\
02593 \n\
02594 Return True if S starts with the specified prefix, False otherwise.\n\
02595 With optional start, test S beginning at that position.\n\
02596 With optional end, stop comparing S at that position.");
02597
02598 static PyObject *
02599 string_startswith(PyStringObject *self, PyObject *args)
02600 {
02601 const char* str = PyString_AS_STRING(self);
02602 Py_ssize_t len = PyString_GET_SIZE(self);
02603 const char* prefix;
02604 Py_ssize_t plen;
02605 Py_ssize_t start = 0;
02606 Py_ssize_t end = INT_MAX;
02607 PyObject *subobj;
02608
02609 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
02610 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
02611 return NULL;
02612 if (PyString_Check(subobj)) {
02613 prefix = PyString_AS_STRING(subobj);
02614 plen = PyString_GET_SIZE(subobj);
02615 }
02616 #ifdef Py_USING_UNICODE
02617 else if (PyUnicode_Check(subobj)) {
02618 Py_ssize_t rc;
02619 rc = PyUnicode_Tailmatch((PyObject *)self,
02620 subobj, start, end, -1);
02621 if (rc == -1)
02622 return NULL;
02623 else
02624 return PyBool_FromLong((long) rc);
02625 }
02626 #endif
02627 else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
02628 return NULL;
02629
02630 string_adjust_indices(&start, &end, len);
02631
02632 if (start+plen > len)
02633 return PyBool_FromLong(0);
02634
02635 if (end-start >= plen)
02636 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
02637 else
02638 return PyBool_FromLong(0);
02639 }
02640
02641
02642 PyDoc_STRVAR(endswith__doc__,
02643 "S.endswith(suffix[, start[, end]]) -> bool\n\
02644 \n\
02645 Return True if S ends with the specified suffix, False otherwise.\n\
02646 With optional start, test S beginning at that position.\n\
02647 With optional end, stop comparing S at that position.");
02648
02649 static PyObject *
02650 string_endswith(PyStringObject *self, PyObject *args)
02651 {
02652 const char* str = PyString_AS_STRING(self);
02653 Py_ssize_t len = PyString_GET_SIZE(self);
02654 const char* suffix;
02655 Py_ssize_t slen;
02656 Py_ssize_t start = 0;
02657 Py_ssize_t end = INT_MAX;
02658 PyObject *subobj;
02659
02660 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
02661 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
02662 return NULL;
02663 if (PyString_Check(subobj)) {
02664 suffix = PyString_AS_STRING(subobj);
02665 slen = PyString_GET_SIZE(subobj);
02666 }
02667 #ifdef Py_USING_UNICODE
02668 else if (PyUnicode_Check(subobj)) {
02669 Py_ssize_t rc;
02670 rc = PyUnicode_Tailmatch((PyObject *)self,
02671 subobj, start, end, +1);
02672 if (rc == -1)
02673 return NULL;
02674 else
02675 return PyBool_FromLong((long) rc);
02676 }
02677 #endif
02678 else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
02679 return NULL;
02680
02681 string_adjust_indices(&start, &end, len);
02682
02683 if (end-start < slen || start > len)
02684 return PyBool_FromLong(0);
02685
02686 if (end-slen > start)
02687 start = end - slen;
02688 if (end-start >= slen)
02689 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
02690 else
02691 return PyBool_FromLong(0);
02692 }
02693
02694
02695 PyDoc_STRVAR(encode__doc__,
02696 "S.encode([encoding[,errors]]) -> object\n\
02697 \n\
02698 Encodes S using the codec registered for encoding. encoding defaults\n\
02699 to the default encoding. errors may be given to set a different error\n\
02700 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
02701 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
02702 'xmlcharrefreplace' as well as any other name registered with\n\
02703 codecs.register_error that is able to handle UnicodeEncodeErrors.");
02704
02705 static PyObject *
02706 string_encode(PyStringObject *self, PyObject *args)
02707 {
02708 char *encoding = NULL;
02709 char *errors = NULL;
02710 PyObject *v;
02711
02712 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
02713 return NULL;
02714 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
02715 if (v == NULL)
02716 goto onError;
02717 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
02718 PyErr_Format(PyExc_TypeError,
02719 "encoder did not return a string/unicode object "
02720 "(type=%.400s)",
02721 v->ob_type->tp_name);
02722 Py_DECREF(v);
02723 return NULL;
02724 }
02725 return v;
02726
02727 onError:
02728 return NULL;
02729 }
02730
02731
02732 PyDoc_STRVAR(decode__doc__,
02733 "S.decode([encoding[,errors]]) -> object\n\
02734 \n\
02735 Decodes S using the codec registered for encoding. encoding defaults\n\
02736 to the default encoding. errors may be given to set a different error\n\
02737 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
02738 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
02739 as well as any other name registerd with codecs.register_error that is\n\
02740 able to handle UnicodeDecodeErrors.");
02741
02742 static PyObject *
02743 string_decode(PyStringObject *self, PyObject *args)
02744 {
02745 char *encoding = NULL;
02746 char *errors = NULL;
02747 PyObject *v;
02748
02749 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
02750 return NULL;
02751 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
02752 if (v == NULL)
02753 goto onError;
02754 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
02755 PyErr_Format(PyExc_TypeError,
02756 "decoder did not return a string/unicode object "
02757 "(type=%.400s)",
02758 v->ob_type->tp_name);
02759 Py_DECREF(v);
02760 return NULL;
02761 }
02762 return v;
02763
02764 onError:
02765 return NULL;
02766 }
02767
02768
02769 PyDoc_STRVAR(expandtabs__doc__,
02770 "S.expandtabs([tabsize]) -> string\n\
02771 \n\
02772 Return a copy of S where all tab characters are expanded using spaces.\n\
02773 If tabsize is not given, a tab size of 8 characters is assumed.");
02774
02775 static PyObject*
02776 string_expandtabs(PyStringObject *self, PyObject *args)
02777 {
02778 const char *e, *p;
02779 char *q;
02780 Py_ssize_t i, j;
02781 PyObject *u;
02782 int tabsize = 8;
02783
02784 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
02785 return NULL;
02786
02787
02788 i = j = 0;
02789 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
02790 for (p = PyString_AS_STRING(self); p < e; p++)
02791 if (*p == '\t') {
02792 if (tabsize > 0)
02793 j += tabsize - (j % tabsize);
02794 }
02795 else {
02796 j++;
02797 if (*p == '\n' || *p == '\r') {
02798 i += j;
02799 j = 0;
02800 }
02801 }
02802
02803
02804 u = PyString_FromStringAndSize(NULL, i + j);
02805 if (!u)
02806 return NULL;
02807
02808 j = 0;
02809 q = PyString_AS_STRING(u);
02810
02811 for (p = PyString_AS_STRING(self); p < e; p++)
02812 if (*p == '\t') {
02813 if (tabsize > 0) {
02814 i = tabsize - (j % tabsize);
02815 j += i;
02816 while (i--)
02817 *q++ = ' ';
02818 }
02819 }
02820 else {
02821 j++;
02822 *q++ = *p;
02823 if (*p == '\n' || *p == '\r')
02824 j = 0;
02825 }
02826
02827 return u;
02828 }
02829
02830 static PyObject *
02831 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
02832 {
02833 PyObject *u;
02834
02835 if (left < 0)
02836 left = 0;
02837 if (right < 0)
02838 right = 0;
02839
02840 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
02841 Py_INCREF(self);
02842 return (PyObject *)self;
02843 }
02844
02845 u = PyString_FromStringAndSize(NULL,
02846 left + PyString_GET_SIZE(self) + right);
02847 if (u) {
02848 if (left)
02849 memset(PyString_AS_STRING(u), fill, left);
02850 memcpy(PyString_AS_STRING(u) + left,
02851 PyString_AS_STRING(self),
02852 PyString_GET_SIZE(self));
02853 if (right)
02854 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
02855 fill, right);
02856 }
02857
02858 return u;
02859 }
02860
02861 PyDoc_STRVAR(ljust__doc__,
02862 "S.ljust(width[, fillchar]) -> string\n"
02863 "\n"
02864 "Return S left justified in a string of length width. Padding is\n"
02865 "done using the specified fill character (default is a space).");
02866
02867 static PyObject *
02868 string_ljust(PyStringObject *self, PyObject *args)
02869 {
02870 int width;
02871 char fillchar = ' ';
02872
02873 if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
02874 return NULL;
02875
02876 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
02877 Py_INCREF(self);
02878 return (PyObject*) self;
02879 }
02880
02881 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
02882 }
02883
02884
02885 PyDoc_STRVAR(rjust__doc__,
02886 "S.rjust(width[, fillchar]) -> string\n"
02887 "\n"
02888 "Return S right justified in a string of length width. Padding is\n"
02889 "done using the specified fill character (default is a space)");
02890
02891 static PyObject *
02892 string_rjust(PyStringObject *self, PyObject *args)
02893 {
02894 int width;
02895 char fillchar = ' ';
02896
02897 if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
02898 return NULL;
02899
02900 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
02901 Py_INCREF(self);
02902 return (PyObject*) self;
02903 }
02904
02905 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
02906 }
02907
02908
02909 PyDoc_STRVAR(center__doc__,
02910 "S.center(width[, fillchar]) -> string\n"
02911 "\n"
02912 "Return S centered in a string of length width. Padding is\n"
02913 "done using the specified fill character (default is a space)");
02914
02915 static PyObject *
02916 string_center(PyStringObject *self, PyObject *args)
02917 {
02918 Py_ssize_t marg, left;
02919 long width;
02920 char fillchar = ' ';
02921
02922 if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar))
02923 return NULL;
02924
02925 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
02926 Py_INCREF(self);
02927 return (PyObject*) self;
02928 }
02929
02930 marg = width - PyString_GET_SIZE(self);
02931 left = marg / 2 + (marg & width & 1);
02932
02933 return pad(self, left, marg - left, fillchar);
02934 }
02935
02936 PyDoc_STRVAR(zfill__doc__,
02937 "S.zfill(width) -> string\n"
02938 "\n"
02939 "Pad a numeric string S with zeros on the left, to fill a field\n"
02940 "of the specified width. The string S is never truncated.");
02941
02942 static PyObject *
02943 string_zfill(PyStringObject *self, PyObject *args)
02944 {
02945 Py_ssize_t fill;
02946 PyObject *s;
02947 char *p;
02948
02949 long width;
02950 if (!PyArg_ParseTuple(args, "l:zfill", &width))
02951 return NULL;
02952
02953 if (PyString_GET_SIZE(self) >= width) {
02954 if (PyString_CheckExact(self)) {
02955 Py_INCREF(self);
02956 return (PyObject*) self;
02957 }
02958 else
02959 return PyString_FromStringAndSize(
02960 PyString_AS_STRING(self),
02961 PyString_GET_SIZE(self)
02962 );
02963 }
02964
02965 fill = width - PyString_GET_SIZE(self);
02966
02967 s = pad(self, fill, 0, '0');
02968
02969 if (s == NULL)
02970 return NULL;
02971
02972 p = PyString_AS_STRING(s);
02973 if (p[fill] == '+' || p[fill] == '-') {
02974
02975 p[0] = p[fill];
02976 p[fill] = '0';
02977 }
02978
02979 return (PyObject*) s;
02980 }
02981
02982 PyDoc_STRVAR(isspace__doc__,
02983 "S.isspace() -> bool\n\
02984 \n\
02985 Return True if all characters in S are whitespace\n\
02986 and there is at least one character in S, False otherwise.");
02987
02988 static PyObject*
02989 string_isspace(PyStringObject *self)
02990 {
02991 register const unsigned char *p
02992 = (unsigned char *) PyString_AS_STRING(self);
02993 register const unsigned char *e;
02994
02995
02996 if (PyString_GET_SIZE(self) == 1 &&
02997 isspace(*p))
02998 return PyBool_FromLong(1);
02999
03000
03001 if (PyString_GET_SIZE(self) == 0)
03002 return PyBool_FromLong(0);
03003
03004 e = p + PyString_GET_SIZE(self);
03005 for (; p < e; p++) {
03006 if (!isspace(*p))
03007 return PyBool_FromLong(0);
03008 }
03009 return PyBool_FromLong(1);
03010 }
03011
03012
03013 PyDoc_STRVAR(isalpha__doc__,
03014 "S.isalpha() -> bool\n\
03015 \n\
03016 Return True if all characters in S are alphabetic\n\
03017 and there is at least one character in S, False otherwise.");
03018
03019 static PyObject*
03020 string_isalpha(PyStringObject *self)
03021 {
03022 register const unsigned char *p
03023 = (unsigned char *) PyString_AS_STRING(self);
03024 register const unsigned char *e;
03025
03026
03027 if (PyString_GET_SIZE(self) == 1 &&
03028 isalpha(*p))
03029 return PyBool_FromLong(1);
03030
03031
03032 if (PyString_GET_SIZE(self) == 0)
03033 return PyBool_FromLong(0);
03034
03035 e = p + PyString_GET_SIZE(self);
03036 for (; p < e; p++) {
03037 if (!isalpha(*p))
03038 return PyBool_FromLong(0);
03039 }
03040 return PyBool_FromLong(1);
03041 }
03042
03043
03044 PyDoc_STRVAR(isalnum__doc__,
03045 "S.isalnum() -> bool\n\
03046 \n\
03047 Return True if all characters in S are alphanumeric\n\
03048 and there is at least one character in S, False otherwise.");
03049
03050 static PyObject*
03051 string_isalnum(PyStringObject *self)
03052 {
03053 register const unsigned char *p
03054 = (unsigned char *) PyString_AS_STRING(self);
03055 register const unsigned char *e;
03056
03057
03058 if (PyString_GET_SIZE(self) == 1 &&
03059 isalnum(*p))
03060 return PyBool_FromLong(1);
03061
03062
03063 if (PyString_GET_SIZE(self) == 0)
03064 return PyBool_FromLong(0);
03065
03066 e = p + PyString_GET_SIZE(self);
03067 for (; p < e; p++) {
03068 if (!isalnum(*p))
03069 return PyBool_FromLong(0);
03070 }
03071 return PyBool_FromLong(1);
03072 }
03073
03074
03075 PyDoc_STRVAR(isdigit__doc__,
03076 "S.isdigit() -> bool\n\
03077 \n\
03078 Return True if all characters in S are digits\n\
03079 and there is at least one character in S, False otherwise.");
03080
03081 static PyObject*
03082 string_isdigit(PyStringObject *self)
03083 {
03084 register const unsigned char *p
03085 = (unsigned char *) PyString_AS_STRING(self);
03086 register const unsigned char *e;
03087
03088
03089 if (PyString_GET_SIZE(self) == 1 &&
03090 isdigit(*p))
03091 return PyBool_FromLong(1);
03092
03093
03094 if (PyString_GET_SIZE(self) == 0)
03095 return PyBool_FromLong(0);
03096
03097 e = p + PyString_GET_SIZE(self);
03098 for (; p < e; p++) {
03099 if (!isdigit(*p))
03100 return PyBool_FromLong(0);
03101 }
03102 return PyBool_FromLong(1);
03103 }
03104
03105
03106 PyDoc_STRVAR(islower__doc__,
03107 "S.islower() -> bool\n\
03108 \n\
03109 Return True if all cased characters in S are lowercase and there is\n\
03110 at least one cased character in S, False otherwise.");
03111
03112 static PyObject*
03113 string_islower(PyStringObject *self)
03114 {
03115 register const unsigned char *p
03116 = (unsigned char *) PyString_AS_STRING(self);
03117 register const unsigned char *e;
03118 int cased;
03119
03120
03121 if (PyString_GET_SIZE(self) == 1)
03122 return PyBool_FromLong(islower(*p) != 0);
03123
03124
03125 if (PyString_GET_SIZE(self) == 0)
03126 return PyBool_FromLong(0);
03127
03128 e = p + PyString_GET_SIZE(self);
03129 cased = 0;
03130 for (; p < e; p++) {
03131 if (isupper(*p))
03132 return PyBool_FromLong(0);
03133 else if (!cased && islower(*p))
03134 cased = 1;
03135 }
03136 return PyBool_FromLong(cased);
03137 }
03138
03139
03140 PyDoc_STRVAR(isupper__doc__,
03141 "S.isupper() -> bool\n\
03142 \n\
03143 Return True if all cased characters in S are uppercase and there is\n\
03144 at least one cased character in S, False otherwise.");
03145
03146 static PyObject*
03147 string_isupper(PyStringObject *self)
03148 {
03149 register const unsigned char *p
03150 = (unsigned char *) PyString_AS_STRING(self);
03151 register const unsigned char *e;
03152 int cased;
03153
03154
03155 if (PyString_GET_SIZE(self) == 1)
03156 return PyBool_FromLong(isupper(*p) != 0);
03157
03158
03159 if (PyString_GET_SIZE(self) == 0)
03160 return PyBool_FromLong(0);
03161
03162 e = p + PyString_GET_SIZE(self);
03163 cased = 0;
03164 for (; p < e; p++) {
03165 if (islower(*p))
03166 return PyBool_FromLong(0);
03167 else if (!cased && isupper(*p))
03168 cased = 1;
03169 }
03170 return PyBool_FromLong(cased);
03171 }
03172
03173
03174 PyDoc_STRVAR(istitle__doc__,
03175 "S.istitle() -> bool\n\
03176 \n\
03177 Return True if S is a titlecased string and there is at least one\n\
03178 character in S, i.e. uppercase characters may only follow uncased\n\
03179 characters and lowercase characters only cased ones. Return False\n\
03180 otherwise.");
03181
03182 static PyObject*
03183 string_istitle(PyStringObject *self, PyObject *uncased)
03184 {
03185 register const unsigned char *p
03186 = (unsigned char *) PyString_AS_STRING(self);
03187 register const unsigned char *e;
03188 int cased, previous_is_cased;
03189
03190
03191 if (PyString_GET_SIZE(self) == 1)
03192 return PyBool_FromLong(isupper(*p) != 0);
03193
03194
03195 if (PyString_GET_SIZE(self) == 0)
03196 return PyBool_FromLong(0);
03197
03198 e = p + PyString_GET_SIZE(self);
03199 cased = 0;
03200 previous_is_cased = 0;
03201 for (; p < e; p++) {
03202 register const unsigned char ch = *p;
03203
03204 if (isupper(ch)) {
03205 if (previous_is_cased)
03206 return PyBool_FromLong(0);
03207 previous_is_cased = 1;
03208 cased = 1;
03209 }
03210 else if (islower(ch)) {
03211 if (!previous_is_cased)
03212 return PyBool_FromLong(0);
03213 previous_is_cased = 1;
03214 cased = 1;
03215 }
03216 else
03217 previous_is_cased = 0;
03218 }
03219 return PyBool_FromLong(cased);
03220 }
03221
03222
03223 PyDoc_STRVAR(splitlines__doc__,
03224 "S.splitlines([keepends]) -> list of strings\n\
03225 \n\
03226 Return a list of the lines in S, breaking at line boundaries.\n\
03227 Line breaks are not included in the resulting list unless keepends\n\
03228 is given and true.");
03229
03230 static PyObject*
03231 string_splitlines(PyStringObject *self, PyObject *args)
03232 {
03233 register Py_ssize_t i;
03234 register Py_ssize_t j;
03235 Py_ssize_t len;
03236 int keepends = 0;
03237 PyObject *list;
03238 PyObject *str;
03239 char *data;
03240
03241 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
03242 return NULL;
03243
03244 data = PyString_AS_STRING(self);
03245 len = PyString_GET_SIZE(self);
03246
03247 list = PyList_New(0);
03248 if (!list)
03249 goto onError;
03250
03251 for (i = j = 0; i < len; ) {
03252 Py_ssize_t eol;
03253
03254
03255 while (i < len && data[i] != '\n' && data[i] != '\r')
03256 i++;
03257
03258
03259 eol = i;
03260 if (i < len) {
03261 if (data[i] == '\r' && i + 1 < len &&
03262 data[i+1] == '\n')
03263 i += 2;
03264 else
03265 i++;
03266 if (keepends)
03267 eol = i;
03268 }
03269 SPLIT_APPEND(data, j, eol);
03270 j = i;
03271 }
03272 if (j < len) {
03273 SPLIT_APPEND(data, j, len);
03274 }
03275
03276 return list;
03277
03278 onError:
03279 Py_DECREF(list);
03280 return NULL;
03281 }
03282
03283 #undef SPLIT_APPEND
03284
03285 static PyObject *
03286 string_getnewargs(PyStringObject *v)
03287 {
03288 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
03289 }
03290
03291
03292 static PyMethodDef
03293 string_methods[] = {
03294
03295
03296 {"join", (PyCFunction)string_join, METH_O, join__doc__},
03297 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
03298 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
03299 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
03300 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
03301 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
03302 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
03303 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
03304 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
03305 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
03306 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
03307 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
03308 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
03309 capitalize__doc__},
03310 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
03311 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
03312 endswith__doc__},
03313 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
03314 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
03315 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
03316 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
03317 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
03318 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
03319 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
03320 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
03321 startswith__doc__},
03322 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
03323 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
03324 swapcase__doc__},
03325 {"translate", (PyCFunction)string_translate, METH_VARARGS,
03326 translate__doc__},
03327 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
03328 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
03329 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
03330 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
03331 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
03332 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
03333 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
03334 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
03335 expandtabs__doc__},
03336 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
03337 splitlines__doc__},
03338 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
03339 {NULL, NULL}
03340 };
03341
03342 static PyObject *
03343 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
03344
03345 static PyObject *
03346 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
03347 {
03348 PyObject *x = NULL;
03349 static char *kwlist[] = {"object", 0};
03350
03351 if (type != &PyString_Type)
03352 return str_subtype_new(type, args, kwds);
03353 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
03354 return NULL;
03355 if (x == NULL)
03356 return PyString_FromString("");
03357 return PyObject_Str(x);
03358 }
03359
03360 static PyObject *
03361 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
03362 {
03363 PyObject *tmp, *pnew;
03364 Py_ssize_t n;
03365
03366 assert(PyType_IsSubtype(type, &PyString_Type));
03367 tmp = string_new(&PyString_Type, args, kwds);
03368 if (tmp == NULL)
03369 return NULL;
03370 assert(PyString_CheckExact(tmp));
03371 n = PyString_GET_SIZE(tmp);
03372 pnew = type->tp_alloc(type, n);
03373 if (pnew != NULL) {
03374 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
03375 ((PyStringObject *)pnew)->ob_shash =
03376 ((PyStringObject *)tmp)->ob_shash;
03377 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
03378 }
03379 Py_DECREF(tmp);
03380 return pnew;
03381 }
03382
03383 static PyObject *
03384 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
03385 {
03386 PyErr_SetString(PyExc_TypeError,
03387 "The basestring type cannot be instantiated");
03388 return NULL;
03389 }
03390
03391 static PyObject *
03392 string_mod(PyObject *v, PyObject *w)
03393 {
03394 if (!PyString_Check(v)) {
03395 Py_INCREF(Py_NotImplemented);
03396 return Py_NotImplemented;
03397 }
03398 return PyString_Format(v, w);
03399 }
03400
03401 PyDoc_STRVAR(basestring_doc,
03402 "Type basestring cannot be instantiated; it is the base for str and unicode.");
03403
03404 static PyNumberMethods string_as_number = {
03405 0,
03406 0,
03407 0,
03408 0,
03409 string_mod,
03410 };
03411
03412
03413 PyTypeObject PyBaseString_Type = {
03414 PyObject_HEAD_INIT(&PyType_Type)
03415 0,
03416 "basestring",
03417 0,
03418 0,
03419 0,
03420 0,
03421 0,
03422 0,
03423 0,
03424 0,
03425 0,
03426 0,
03427 0,
03428 0,
03429 0,
03430 0,
03431 0,
03432 0,
03433 0,
03434 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
03435 basestring_doc,
03436 0,
03437 0,
03438 0,
03439 0,
03440 0,
03441 0,
03442 0,
03443 0,
03444 0,
03445 &PyBaseObject_Type,
03446 0,
03447 0,
03448 0,
03449 0,
03450 0,
03451 0,
03452 basestring_new,
03453 0,
03454 };
03455
03456 PyDoc_STRVAR(string_doc,
03457 "str(object) -> string\n\
03458 \n\
03459 Return a nice string representation of the object.\n\
03460 If the argument is a string, the return value is the same object.");
03461
03462 PyTypeObject PyString_Type = {
03463 PyObject_HEAD_INIT(&PyType_Type)
03464 0,
03465 "str",
03466 sizeof(PyStringObject),
03467 sizeof(char),
03468 (destructor)string_dealloc,
03469 (printfunc)string_print,
03470 0,
03471 0,
03472 0,
03473 (reprfunc)string_repr,
03474 &string_as_number,
03475 &string_as_sequence,
03476 &string_as_mapping,
03477 (hashfunc)string_hash,
03478 0,
03479 (reprfunc)string_str,
03480 PyObject_GenericGetAttr,
03481 0,
03482 &string_as_buffer,
03483 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
03484 Py_TPFLAGS_BASETYPE,
03485 string_doc,
03486 0,
03487 0,
03488 (richcmpfunc)string_richcompare,
03489 0,
03490 0,
03491 0,
03492 string_methods,
03493 0,
03494 0,
03495 &PyBaseString_Type,
03496 0,
03497 0,
03498 0,
03499 0,
03500 0,
03501 0,
03502 string_new,
03503 PyObject_Del,
03504 };
03505
03506 void
03507 PyString_Concat(register PyObject **pv, register PyObject *w)
03508 {
03509 register PyObject *v;
03510 if (*pv == NULL)
03511 return;
03512 if (w == NULL || !PyString_Check(*pv)) {
03513 Py_DECREF(*pv);
03514 *pv = NULL;
03515 return;
03516 }
03517 v = string_concat((PyStringObject *) *pv, w);
03518 Py_DECREF(*pv);
03519 *pv = v;
03520 }
03521
03522 void
03523 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
03524 {
03525 PyString_Concat(pv, w);
03526 Py_XDECREF(w);
03527 }
03528
03529
03530
03531
03532
03533
03534
03535
03536
03537
03538
03539
03540
03541
03542
03543
03544 int
03545 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
03546 {
03547 register PyObject *v;
03548 register PyStringObject *sv;
03549 v = *pv;
03550 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
03551 PyString_CHECK_INTERNED(v)) {
03552 *pv = 0;
03553 Py_DECREF(v);
03554 PyErr_BadInternalCall();
03555 return -1;
03556 }
03557
03558 _Py_DEC_REFTOTAL;
03559 _Py_ForgetReference(v);
03560 *pv = (PyObject *)
03561 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
03562 if (*pv == NULL) {
03563 PyObject_Del(v);
03564 PyErr_NoMemory();
03565 return -1;
03566 }
03567 _Py_NewReference(*pv);
03568 sv = (PyStringObject *) *pv;
03569 sv->ob_size = newsize;
03570 sv->ob_sval[newsize] = '\0';
03571 sv->ob_shash = -1;
03572 return 0;
03573 }
03574
03575
03576
03577 static PyObject *
03578 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
03579 {
03580 Py_ssize_t argidx = *p_argidx;
03581 if (argidx < arglen) {
03582 (*p_argidx)++;
03583 if (arglen < 0)
03584 return args;
03585 else
03586 return PyTuple_GetItem(args, argidx);
03587 }
03588 PyErr_SetString(PyExc_TypeError,
03589 "not enough arguments for format string");
03590 return NULL;
03591 }
03592
03593
03594
03595
03596
03597
03598
03599
03600 #define F_LJUST (1<<0)
03601 #define F_SIGN (1<<1)
03602 #define F_BLANK (1<<2)
03603 #define F_ALT (1<<3)
03604 #define F_ZERO (1<<4)
03605
03606 static int
03607 formatfloat(char *buf, size_t buflen, int flags,
03608 int prec, int type, PyObject *v)
03609 {
03610
03611
03612 char fmt[20];
03613 double x;
03614 x = PyFloat_AsDouble(v);
03615 if (x == -1.0 && PyErr_Occurred()) {
03616 PyErr_SetString(PyExc_TypeError, "float argument required");
03617 return -1;
03618 }
03619 if (prec < 0)
03620 prec = 6;
03621 if (type == 'f' && fabs(x)/1e25 >= 1e25)
03622 type = 'g';
03623
03624
03625
03626
03627
03628
03629
03630
03631
03632
03633
03634
03635
03636
03637
03638
03639 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
03640 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
03641 PyErr_SetString(PyExc_OverflowError,
03642 "formatted float is too long (precision too large?)");
03643 return -1;
03644 }
03645 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
03646 (flags&F_ALT) ? "#" : "",
03647 prec, type);
03648 PyOS_ascii_formatd(buf, buflen, fmt, x);
03649 return (int)strlen(buf);
03650 }
03651
03652
03653
03654
03655
03656
03657
03658
03659
03660
03661
03662
03663
03664
03665
03666
03667
03668
03669
03670
03671
03672
03673 PyObject*
03674 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
03675 char **pbuf, int *plen)
03676 {
03677 PyObject *result = NULL;
03678 char *buf;
03679 Py_ssize_t i;
03680 int sign;
03681 int len;
03682 int numdigits;
03683 int numnondigits = 0;
03684
03685 switch (type) {
03686 case 'd':
03687 case 'u':
03688 result = val->ob_type->tp_str(val);
03689 break;
03690 case 'o':
03691 result = val->ob_type->tp_as_number->nb_oct(val);
03692 break;
03693 case 'x':
03694 case 'X':
03695 numnondigits = 2;
03696 result = val->ob_type->tp_as_number->nb_hex(val);
03697 break;
03698 default:
03699 assert(!"'type' not in [duoxX]");
03700 }
03701 if (!result)
03702 return NULL;
03703
03704
03705 if (result->ob_refcnt != 1) {
03706 PyErr_BadInternalCall();
03707 return NULL;
03708 }
03709 buf = PyString_AsString(result);
03710 len = PyString_Size(result);
03711 if (buf[len-1] == 'L') {
03712 --len;
03713 buf[len] = '\0';
03714 }
03715 sign = buf[0] == '-';
03716 numnondigits += sign;
03717 numdigits = len - numnondigits;
03718 assert(numdigits > 0);
03719
03720
03721 if ((flags & F_ALT) == 0) {
03722
03723 int skipped = 0;
03724 switch (type) {
03725 case 'o':
03726 assert(buf[sign] == '0');
03727
03728 if (numdigits > 1) {
03729 skipped = 1;
03730 --numdigits;
03731 }
03732 break;
03733 case 'x':
03734 case 'X':
03735 assert(buf[sign] == '0');
03736 assert(buf[sign + 1] == 'x');
03737 skipped = 2;
03738 numnondigits -= 2;
03739 break;
03740 }
03741 if (skipped) {
03742 buf += skipped;
03743 len -= skipped;
03744 if (sign)
03745 buf[0] = '-';
03746 }
03747 assert(len == numnondigits + numdigits);
03748 assert(numdigits > 0);
03749 }
03750
03751
03752 if (prec > numdigits) {
03753 PyObject *r1 = PyString_FromStringAndSize(NULL,
03754 numnondigits + prec);
03755 char *b1;
03756 if (!r1) {
03757 Py_DECREF(result);
03758 return NULL;
03759 }
03760 b1 = PyString_AS_STRING(r1);
03761 for (i = 0; i < numnondigits; ++i)
03762 *b1++ = *buf++;
03763 for (i = 0; i < prec - numdigits; i++)
03764 *b1++ = '0';
03765 for (i = 0; i < numdigits; i++)
03766 *b1++ = *buf++;
03767 *b1 = '\0';
03768 Py_DECREF(result);
03769 result = r1;
03770 buf = PyString_AS_STRING(result);
03771 len = numnondigits + prec;
03772 }
03773
03774
03775 if (type == 'X') {
03776
03777
03778 for (i = 0; i < len; i++)
03779 if (buf[i] >= 'a' && buf[i] <= 'x')
03780 buf[i] -= 'a'-'A';
03781 }
03782 *pbuf = buf;
03783 *plen = len;
03784 return result;
03785 }
03786
03787 static int
03788 formatint(char *buf, size_t buflen, int flags,
03789 int prec, int type, PyObject *v)
03790 {
03791
03792
03793
03794 char fmt[64];
03795 char *sign;
03796 long x;
03797
03798 x = PyInt_AsLong(v);
03799 if (x == -1 && PyErr_Occurred()) {
03800 PyErr_SetString(PyExc_TypeError, "int argument required");
03801 return -1;
03802 }
03803 if (x < 0 && type == 'u') {
03804 type = 'd';
03805 }
03806 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
03807 sign = "-";
03808 else
03809 sign = "";
03810 if (prec < 0)
03811 prec = 1;
03812
03813 if ((flags & F_ALT) &&
03814 (type == 'x' || type == 'X')) {
03815
03816
03817
03818
03819
03820
03821
03822
03823
03824
03825
03826
03827
03828
03829
03830
03831
03832
03833
03834
03835 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
03836 sign, type, prec, type);
03837 }
03838 else {
03839 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
03840 sign, (flags&F_ALT) ? "#" : "",
03841 prec, type);
03842 }
03843
03844
03845
03846
03847 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
03848 PyErr_SetString(PyExc_OverflowError,
03849 "formatted integer is too long (precision too large?)");
03850 return -1;
03851 }
03852 if (sign[0])
03853 PyOS_snprintf(buf, buflen, fmt, -x);
03854 else
03855 PyOS_snprintf(buf, buflen, fmt, x);
03856 return (int)strlen(buf);
03857 }
03858
03859 static int
03860 formatchar(char *buf, size_t buflen, PyObject *v)
03861 {
03862
03863 if (PyString_Check(v)) {
03864 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
03865 return -1;
03866 }
03867 else {
03868 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
03869 return -1;
03870 }
03871 buf[1] = '\0';
03872 return 1;
03873 }
03874
03875
03876
03877
03878
03879
03880
03881
03882
03883 #define FORMATBUFLEN (size_t)120
03884
03885 PyObject *
03886 PyString_Format(PyObject *format, PyObject *args)
03887 {
03888 char *fmt, *res;
03889 Py_ssize_t arglen, argidx;
03890 Py_ssize_t reslen, rescnt, fmtcnt;
03891 int args_owned = 0;
03892 PyObject *result, *orig_args;
03893 #ifdef Py_USING_UNICODE
03894 PyObject *v, *w;
03895 #endif
03896 PyObject *dict = NULL;
03897 if (format == NULL || !PyString_Check(format) || args == NULL) {
03898 PyErr_BadInternalCall();
03899 return NULL;
03900 }
03901 orig_args = args;
03902 fmt = PyString_AS_STRING(format);
03903 fmtcnt = PyString_GET_SIZE(format);
03904 reslen = rescnt = fmtcnt + 100;
03905 result = PyString_FromStringAndSize((char *)NULL, reslen);
03906 if (result == NULL)
03907 return NULL;
03908 res = PyString_AsString(result);
03909 if (PyTuple_Check(args)) {
03910 arglen = PyTuple_GET_SIZE(args);
03911 argidx = 0;
03912 }
03913 else {
03914 arglen = -1;
03915 argidx = -2;
03916 }
03917 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
03918 !PyObject_TypeCheck(args, &PyBaseString_Type))
03919 dict = args;
03920 while (--fmtcnt >= 0) {
03921 if (*fmt != '%') {
03922 if (--rescnt < 0) {
03923 rescnt = fmtcnt + 100;
03924 reslen += rescnt;
03925 if (_PyString_Resize(&result, reslen) < 0)
03926 return NULL;
03927 res = PyString_AS_STRING(result)
03928 + reslen - rescnt;
03929 --rescnt;
03930 }
03931 *res++ = *fmt++;
03932 }
03933 else {
03934
03935 int flags = 0;
03936 Py_ssize_t width = -1;
03937 int prec = -1;
03938 int c = '\0';
03939 int fill;
03940 PyObject *v = NULL;
03941 PyObject *temp = NULL;
03942 char *pbuf;
03943 int sign;
03944 int len;
03945 char formatbuf[FORMATBUFLEN];
03946
03947 #ifdef Py_USING_UNICODE
03948 char *fmt_start = fmt;
03949 int argidx_start = argidx;
03950 #endif
03951
03952 fmt++;
03953 if (*fmt == '(') {
03954 char *keystart;
03955 Py_ssize_t keylen;
03956 PyObject *key;
03957 int pcount = 1;
03958
03959 if (dict == NULL) {
03960 PyErr_SetString(PyExc_TypeError,
03961 "format requires a mapping");
03962 goto error;
03963 }
03964 ++fmt;
03965 --fmtcnt;
03966 keystart = fmt;
03967
03968 while (pcount > 0 && --fmtcnt >= 0) {
03969 if (*fmt == ')')
03970 --pcount;
03971 else if (*fmt == '(')
03972 ++pcount;
03973 fmt++;
03974 }
03975 keylen = fmt - keystart - 1;
03976 if (fmtcnt < 0 || pcount > 0) {
03977 PyErr_SetString(PyExc_ValueError,
03978 "incomplete format key");
03979 goto error;
03980 }
03981 key = PyString_FromStringAndSize(keystart,
03982 keylen);
03983 if (key == NULL)
03984 goto error;
03985 if (args_owned) {
03986 Py_DECREF(args);
03987 args_owned = 0;
03988 }
03989 args = PyObject_GetItem(dict, key);
03990 Py_DECREF(key);
03991 if (args == NULL) {
03992 goto error;
03993 }
03994 args_owned = 1;
03995 arglen = -1;
03996 argidx = -2;
03997 }
03998 while (--fmtcnt >= 0) {
03999 switch (c = *fmt++) {
04000 case '-': flags |= F_LJUST; continue;
04001 case '+': flags |= F_SIGN; continue;
04002 case ' ': flags |= F_BLANK; continue;
04003 case '#': flags |= F_ALT; continue;
04004 case '0': flags |= F_ZERO; continue;
04005 }
04006 break;
04007 }
04008 if (c == '*') {
04009 v = getnextarg(args, arglen, &argidx);
04010 if (v == NULL)
04011 goto error;
04012 if (!PyInt_Check(v)) {
04013 PyErr_SetString(PyExc_TypeError,
04014 "* wants int");
04015 goto error;
04016 }
04017 width = PyInt_AsLong(v);
04018 if (width < 0) {
04019 flags |= F_LJUST;
04020 width = -width;
04021 }
04022 if (--fmtcnt >= 0)
04023 c = *fmt++;
04024 }
04025 else if (c >= 0 && isdigit(c)) {
04026 width = c - '0';
04027 while (--fmtcnt >= 0) {
04028 c = Py_CHARMASK(*fmt++);
04029 if (!isdigit(c))
04030 break;
04031 if ((width*10) / 10 != width) {
04032 PyErr_SetString(
04033 PyExc_ValueError,
04034 "width too big");
04035 goto error;
04036 }
04037 width = width*10 + (c - '0');
04038 }
04039 }
04040 if (c == '.') {
04041 prec = 0;
04042 if (--fmtcnt >= 0)
04043 c = *fmt++;
04044 if (c == '*') {
04045 v = getnextarg(args, arglen, &argidx);
04046 if (v == NULL)
04047 goto error;
04048 if (!PyInt_Check(v)) {
04049 PyErr_SetString(
04050 PyExc_TypeError,
04051 "* wants int");
04052 goto error;
04053 }
04054 prec = PyInt_AsLong(v);
04055 if (prec < 0)
04056 prec = 0;
04057 if (--fmtcnt >= 0)
04058 c = *fmt++;
04059 }
04060 else if (c >= 0 && isdigit(c)) {
04061 prec = c - '0';
04062 while (--fmtcnt >= 0) {
04063 c = Py_CHARMASK(*fmt++);
04064 if (!isdigit(c))
04065 break;
04066 if ((prec*10) / 10 != prec) {
04067 PyErr_SetString(
04068 PyExc_ValueError,
04069 "prec too big");
04070 goto error;
04071 }
04072 prec = prec*10 + (c - '0');
04073 }
04074 }
04075 }
04076 if (fmtcnt >= 0) {
04077 if (c == 'h' || c == 'l' || c == 'L') {
04078 if (--fmtcnt >= 0)
04079 c = *fmt++;
04080 }
04081 }
04082 if (fmtcnt < 0) {
04083 PyErr_SetString(PyExc_ValueError,
04084 "incomplete format");
04085 goto error;
04086 }
04087 if (c != '%') {
04088 v = getnextarg(args, arglen, &argidx);
04089 if (v == NULL)
04090 goto error;
04091 }
04092 sign = 0;
04093 fill = ' ';
04094 switch (c) {
04095 case '%':
04096 pbuf = "%";
04097 len = 1;
04098 break;
04099 case 's':
04100 #ifdef Py_USING_UNICODE
04101 if (PyUnicode_Check(v)) {
04102 fmt = fmt_start;
04103 argidx = argidx_start;
04104 goto unicode;
04105 }
04106 #endif
04107 temp = _PyObject_Str(v);
04108 #ifdef Py_USING_UNICODE
04109 if (temp != NULL && PyUnicode_Check(temp)) {
04110 Py_DECREF(temp);
04111 fmt = fmt_start;
04112 argidx = argidx_start;
04113 goto unicode;
04114 }
04115 #endif
04116
04117 case 'r':
04118 if (c == 'r')
04119 temp = PyObject_Repr(v);
04120 if (temp == NULL)
04121 goto error;
04122 if (!PyString_Check(temp)) {
04123 PyErr_SetString(PyExc_TypeError,
04124 "%s argument has non-string str()");
04125 Py_DECREF(temp);
04126 goto error;
04127 }
04128 pbuf = PyString_AS_STRING(temp);
04129 len = PyString_GET_SIZE(temp);
04130 if (prec >= 0 && len > prec)
04131 len = prec;
04132 break;
04133 case 'i':
04134 case 'd':
04135 case 'u':
04136 case 'o':
04137 case 'x':
04138 case 'X':
04139 if (c == 'i')
04140 c = 'd';
04141 if (PyLong_Check(v)) {
04142 temp = _PyString_FormatLong(v, flags,
04143 prec, c, &pbuf, &len);
04144 if (!temp)
04145 goto error;
04146 sign = 1;
04147 }
04148 else {
04149 pbuf = formatbuf;
04150 len = formatint(pbuf,
04151 sizeof(formatbuf),
04152 flags, prec, c, v);
04153 if (len < 0)
04154 goto error;
04155 sign = 1;
04156 }
04157 if (flags & F_ZERO)
04158 fill = '0';
04159 break;
04160 case 'e':
04161 case 'E':
04162 case 'f':
04163 case 'F':
04164 case 'g':
04165 case 'G':
04166 if (c == 'F')
04167 c = 'f';
04168 pbuf = formatbuf;
04169 len = formatfloat(pbuf, sizeof(formatbuf),
04170 flags, prec, c, v);
04171 if (len < 0)
04172 goto error;
04173 sign = 1;
04174 if (flags & F_ZERO)
04175 fill = '0';
04176 break;
04177 case 'c':
04178 #ifdef Py_USING_UNICODE
04179 if (PyUnicode_Check(v)) {
04180 fmt = fmt_start;
04181 argidx = argidx_start;
04182 goto unicode;
04183 }
04184 #endif
04185 pbuf = formatbuf;
04186 len = formatchar(pbuf, sizeof(formatbuf), v);
04187 if (len < 0)
04188 goto error;
04189 break;
04190 default:
04191 PyErr_Format(PyExc_ValueError,
04192 "unsupported format character '%c' (0x%x) "
04193 "at index %i",
04194 c, c,
04195 (int)(fmt - 1 - PyString_AsString(format)));
04196 goto error;
04197 }
04198 if (sign) {
04199 if (*pbuf == '-' || *pbuf == '+') {
04200 sign = *pbuf++;
04201 len--;
04202 }
04203 else if (flags & F_SIGN)
04204 sign = '+';
04205 else if (flags & F_BLANK)
04206 sign = ' ';
04207 else
04208 sign = 0;
04209 }
04210 if (width < len)
04211 width = len;
04212 if (rescnt - (sign != 0) < width) {
04213 reslen -= rescnt;
04214 rescnt = width + fmtcnt + 100;
04215 reslen += rescnt;
04216 if (reslen < 0) {
04217 Py_DECREF(result);
04218 return PyErr_NoMemory();
04219 }
04220 if (_PyString_Resize(&result, reslen) < 0)
04221 return NULL;
04222 res = PyString_AS_STRING(result)
04223 + reslen - rescnt;
04224 }
04225 if (sign) {
04226 if (fill != ' ')
04227 *res++ = sign;
04228 rescnt--;
04229 if (width > len)
04230 width--;
04231 }
04232 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
04233 assert(pbuf[0] == '0');
04234 assert(pbuf[1] == c);
04235 if (fill != ' ') {
04236 *res++ = *pbuf++;
04237 *res++ = *pbuf++;
04238 }
04239 rescnt -= 2;
04240 width -= 2;
04241 if (width < 0)
04242 width = 0;
04243 len -= 2;
04244 }
04245 if (width > len && !(flags & F_LJUST)) {
04246 do {
04247 --rescnt;
04248 *res++ = fill;
04249 } while (--width > len);
04250 }
04251 if (fill == ' ') {
04252 if (sign)
04253 *res++ = sign;
04254 if ((flags & F_ALT) &&
04255 (c == 'x' || c == 'X')) {
04256 assert(pbuf[0] == '0');
04257 assert(pbuf[1] == c);
04258 *res++ = *pbuf++;
04259 *res++ = *pbuf++;
04260 }
04261 }
04262 memcpy(res, pbuf, len);
04263 res += len;
04264 rescnt -= len;
04265 while (--width >= len) {
04266 --rescnt;
04267 *res++ = ' ';
04268 }
04269 if (dict && (argidx < arglen) && c != '%') {
04270 PyErr_SetString(PyExc_TypeError,
04271 "not all arguments converted during string formatting");
04272 goto error;
04273 }
04274 Py_XDECREF(temp);
04275 }
04276 }
04277 if (argidx < arglen && !dict) {
04278 PyErr_SetString(PyExc_TypeError,
04279 "not all arguments converted during string formatting");
04280 goto error;
04281 }
04282 if (args_owned) {
04283 Py_DECREF(args);
04284 }
04285 _PyString_Resize(&result, reslen - rescnt);
04286 return result;
04287
04288 #ifdef Py_USING_UNICODE
04289 unicode:
04290 if (args_owned) {
04291 Py_DECREF(args);
04292 args_owned = 0;
04293 }
04294
04295 if (PyTuple_Check(orig_args) && argidx > 0) {
04296 PyObject *v;
04297 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
04298 v = PyTuple_New(n);
04299 if (v == NULL)
04300 goto error;
04301 while (--n >= 0) {
04302 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
04303 Py_INCREF(w);
04304 PyTuple_SET_ITEM(v, n, w);
04305 }
04306 args = v;
04307 } else {
04308 Py_INCREF(orig_args);
04309 args = orig_args;
04310 }
04311 args_owned = 1;
04312
04313
04314 rescnt = res - PyString_AS_STRING(result);
04315 if (_PyString_Resize(&result, rescnt))
04316 goto error;
04317 fmtcnt = PyString_GET_SIZE(format) - \
04318 (fmt - PyString_AS_STRING(format));
04319 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
04320 if (format == NULL)
04321 goto error;
04322 v = PyUnicode_Format(format, args);
04323 Py_DECREF(format);
04324 if (v == NULL)
04325 goto error;
04326
04327
04328 w = PyUnicode_Concat(result, v);
04329 Py_DECREF(result);
04330 Py_DECREF(v);
04331 Py_DECREF(args);
04332 return w;
04333 #endif
04334
04335 error:
04336 Py_DECREF(result);
04337 if (args_owned) {
04338 Py_DECREF(args);
04339 }
04340 return NULL;
04341 }
04342
04343 void
04344 PyString_InternInPlace(PyObject **p)
04345 {
04346 register PyStringObject *s = (PyStringObject *)(*p);
04347 PyObject *t;
04348 if (s == NULL || !PyString_Check(s))
04349 Py_FatalError("PyString_InternInPlace: strings only please!");
04350
04351
04352 if (!PyString_CheckExact(s))
04353 return;
04354 if (PyString_CHECK_INTERNED(s))
04355 return;
04356 if (interned == NULL) {
04357 interned = PyDict_New();
04358 if (interned == NULL) {
04359 PyErr_Clear();
04360 return;
04361 }
04362 }
04363 t = PyDict_GetItem(interned, (PyObject *)s);
04364 if (t) {
04365 Py_INCREF(t);
04366 Py_DECREF(*p);
04367 *p = t;
04368 return;
04369 }
04370
04371 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
04372 PyErr_Clear();
04373 return;
04374 }
04375
04376
04377 s->ob_refcnt -= 2;
04378 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
04379 }
04380
04381 void
04382 PyString_InternImmortal(PyObject **p)
04383 {
04384 PyString_InternInPlace(p);
04385 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
04386 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
04387 Py_INCREF(*p);
04388 }
04389 }
04390
04391
04392 PyObject *
04393 PyString_InternFromString(const char *cp)
04394 {
04395 PyObject *s = PyString_FromString(cp);
04396 if (s == NULL)
04397 return NULL;
04398 PyString_InternInPlace(&s);
04399 return s;
04400 }
04401
04402 void
04403 PyString_Fini(void)
04404 {
04405 int i;
04406 for (i = 0; i < UCHAR_MAX + 1; i++) {
04407 Py_XDECREF(characters[i]);
04408 characters[i] = NULL;
04409 }
04410 Py_XDECREF(nullstring);
04411 nullstring = NULL;
04412 }
04413
04414 void _Py_ReleaseInternedStrings(void)
04415 {
04416 PyObject *keys;
04417 PyStringObject *s;
04418 Py_ssize_t i, n;
04419
04420 if (interned == NULL || !PyDict_Check(interned))
04421 return;
04422 keys = PyDict_Keys(interned);
04423 if (keys == NULL || !PyList_Check(keys)) {
04424 PyErr_Clear();
04425 return;
04426 }
04427
04428
04429
04430
04431
04432
04433 fprintf(stderr, "releasing interned strings\n");
04434 n = PyList_GET_SIZE(keys);
04435 for (i = 0; i < n; i++) {
04436 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
04437 switch (s->ob_sstate) {
04438 case SSTATE_NOT_INTERNED:
04439
04440 break;
04441 case SSTATE_INTERNED_IMMORTAL:
04442 s->ob_refcnt += 1;
04443 break;
04444 case SSTATE_INTERNED_MORTAL:
04445 s->ob_refcnt += 2;
04446 break;
04447 default:
04448 Py_FatalError("Inconsistent interned string state.");
04449 }
04450 s->ob_sstate = SSTATE_NOT_INTERNED;
04451 }
04452 Py_DECREF(keys);
04453 PyDict_Clear(interned);
04454 Py_DECREF(interned);
04455 interned = NULL;
04456 }