Main Page | Class Hierarchy | Data Structures | File List | Data Fields | Globals

Objects/stringobject.c

Go to the documentation of this file.
00001 /* String object implementation */
00002 
00003 #include "Python.h"
00004 
00005 #include <ctype.h>
00006 
00007 #ifdef COUNT_ALLOCS
00008 int null_strings, one_strings;
00009 #endif
00010 
00011 static PyStringObject *characters[UCHAR_MAX + 1];
00012 static PyStringObject *nullstring;
00013 
00014 /* This dictionary holds all interned strings.  Note that references to
00015    strings in this dictionary are *not* counted in the string's ob_refcnt.
00016    When the interned string reaches a refcnt of 0 the string deallocation
00017    function will delete the reference from this dictionary.
00018 
00019    Another way to look at this is that to say that the actual reference 
00020    count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
00021 */
00022 static PyObject *interned;
00023 
00024 
00025 /*
00026    For both PyString_FromString() and PyString_FromStringAndSize(), the
00027    parameter `size' denotes number of characters to allocate, not counting any
00028    null terminating character.
00029 
00030    For PyString_FromString(), the parameter `str' points to a null-terminated
00031    string containing exactly `size' bytes.
00032 
00033    For PyString_FromStringAndSize(), the parameter the parameter `str' is
00034    either NULL or else points to a string containing at least `size' bytes.
00035    For PyString_FromStringAndSize(), the string in the `str' parameter does
00036    not have to be null-terminated.  (Therefore it is safe to construct a
00037    substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
00038    If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
00039    bytes (setting the last byte to the null terminating character) and you can
00040    fill in the data yourself.  If `str' is non-NULL then the resulting
00041    PyString object must be treated as immutable and you must not fill in nor
00042    alter the data yourself, since the strings may be shared.
00043 
00044    The PyObject member `op->ob_size', which denotes the number of "extra
00045    items" in a variable-size object, will contain the number of bytes
00046    allocated for string data, not counting the null terminating character.  It
00047    is therefore equal to the equal to the `size' parameter (for
00048    PyString_FromStringAndSize()) or the length of the string in the `str'
00049    parameter (for PyString_FromString()).
00050 */
00051 PyObject *
00052 PyString_FromStringAndSize(const char *str, Py_ssize_t size)
00053 {
00054         register PyStringObject *op;
00055         assert(size >= 0);
00056         if (size == 0 && (op = nullstring) != NULL) {
00057 #ifdef COUNT_ALLOCS
00058                 null_strings++;
00059 #endif
00060                 Py_INCREF(op);
00061                 return (PyObject *)op;
00062         }
00063         if (size == 1 && str != NULL &&
00064             (op = characters[*str & UCHAR_MAX]) != NULL)
00065         {
00066 #ifdef COUNT_ALLOCS
00067                 one_strings++;
00068 #endif
00069                 Py_INCREF(op);
00070                 return (PyObject *)op;
00071         }
00072 
00073         /* Inline PyObject_NewVar */
00074         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
00075         if (op == NULL)
00076                 return PyErr_NoMemory();
00077         PyObject_INIT_VAR(op, &PyString_Type, size);
00078         op->ob_shash = -1;
00079         op->ob_sstate = SSTATE_NOT_INTERNED;
00080         if (str != NULL)
00081                 memcpy(op->ob_sval, str, size);
00082         op->ob_sval[size] = '\0';
00083         /* share short strings */
00084         if (size == 0) {
00085                 PyObject *t = (PyObject *)op;
00086                 PyString_InternInPlace(&t);
00087                 op = (PyStringObject *)t;
00088                 nullstring = op;
00089                 Py_INCREF(op);
00090         } else if (size == 1 && str != NULL) {
00091                 PyObject *t = (PyObject *)op;
00092                 PyString_InternInPlace(&t);
00093                 op = (PyStringObject *)t;
00094                 characters[*str & UCHAR_MAX] = op;
00095                 Py_INCREF(op);
00096         }
00097         return (PyObject *) op;
00098 }
00099 
00100 PyObject *
00101 PyString_FromString(const char *str)
00102 {
00103         register size_t size;
00104         register PyStringObject *op;
00105 
00106         assert(str != NULL);
00107         size = strlen(str);
00108         if (size > INT_MAX) {
00109                 PyErr_SetString(PyExc_OverflowError,
00110                         "string is too long for a Python string");
00111                 return NULL;
00112         }
00113         if (size == 0 && (op = nullstring) != NULL) {
00114 #ifdef COUNT_ALLOCS
00115                 null_strings++;
00116 #endif
00117                 Py_INCREF(op);
00118                 return (PyObject *)op;
00119         }
00120         if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
00121 #ifdef COUNT_ALLOCS
00122                 one_strings++;
00123 #endif
00124                 Py_INCREF(op);
00125                 return (PyObject *)op;
00126         }
00127 
00128         /* Inline PyObject_NewVar */
00129         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
00130         if (op == NULL)
00131                 return PyErr_NoMemory();
00132         PyObject_INIT_VAR(op, &PyString_Type, size);
00133         op->ob_shash = -1;
00134         op->ob_sstate = SSTATE_NOT_INTERNED;
00135         memcpy(op->ob_sval, str, size+1);
00136         /* share short strings */
00137         if (size == 0) {
00138                 PyObject *t = (PyObject *)op;
00139                 PyString_InternInPlace(&t);
00140                 op = (PyStringObject *)t;
00141                 nullstring = op;
00142                 Py_INCREF(op);
00143         } else if (size == 1) {
00144                 PyObject *t = (PyObject *)op;
00145                 PyString_InternInPlace(&t);
00146                 op = (PyStringObject *)t;
00147                 characters[*str & UCHAR_MAX] = op;
00148                 Py_INCREF(op);
00149         }
00150         return (PyObject *) op;
00151 }
00152 
00153 PyObject *
00154 PyString_FromFormatV(const char *format, va_list vargs)
00155 {
00156         va_list count;
00157         Py_ssize_t n = 0;
00158         const char* f;
00159         char *s;
00160         PyObject* string;
00161 
00162 #ifdef VA_LIST_IS_ARRAY
00163         memcpy(count, vargs, sizeof(va_list));
00164 #else
00165 #ifdef  __va_copy
00166         __va_copy(count, vargs);
00167 #else
00168         count = vargs;
00169 #endif
00170 #endif
00171         /* step 1: figure out how large a buffer we need */
00172         for (f = format; *f; f++) {
00173                 if (*f == '%') {
00174                         const char* p = f;
00175                         while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
00176                                 ;
00177 
00178                         /* skip the 'l' in %ld, since it doesn't change the
00179                            width.  although only %d is supported (see
00180                            "expand" section below), others can be easily
00181                            added */
00182                         if (*f == 'l' && *(f+1) == 'd')
00183                                 ++f;
00184                         /* likewise for %zd */
00185                         if (*f == 'z' && *(f+1) == 'd')
00186                                 ++f;                    
00187 
00188                         switch (*f) {
00189                         case 'c':
00190                                 (void)va_arg(count, int);
00191                                 /* fall through... */
00192                         case '%':
00193                                 n++;
00194                                 break;
00195                         case 'd': case 'i': case 'x':
00196                                 (void) va_arg(count, int);
00197                                 /* 20 bytes is enough to hold a 64-bit
00198                                    integer.  Decimal takes the most space.
00199                                    This isn't enough for octal. */
00200                                 n += 20;
00201                                 break;
00202                         case 's':
00203                                 s = va_arg(count, char*);
00204                                 n += strlen(s);
00205                                 break;
00206                         case 'p':
00207                                 (void) va_arg(count, int);
00208                                 /* maximum 64-bit pointer representation:
00209                                  * 0xffffffffffffffff
00210                                  * so 19 characters is enough.
00211                                  * XXX I count 18 -- what's the extra for?
00212                                  */
00213                                 n += 19;
00214                                 break;
00215                         default:
00216                                 /* if we stumble upon an unknown
00217                                    formatting code, copy the rest of
00218                                    the format string to the output
00219                                    string. (we cannot just skip the
00220                                    code, since there's no way to know
00221                                    what's in the argument list) */
00222                                 n += strlen(p);
00223                                 goto expand;
00224                         }
00225                 } else
00226                         n++;
00227         }
00228  expand:
00229         /* step 2: fill the buffer */
00230         /* Since we've analyzed how much space we need for the worst case,
00231            use sprintf directly instead of the slower PyOS_snprintf. */
00232         string = PyString_FromStringAndSize(NULL, n);
00233         if (!string)
00234                 return NULL;
00235 
00236         s = PyString_AsString(string);
00237 
00238         for (f = format; *f; f++) {
00239                 if (*f == '%') {
00240                         const char* p = f++;
00241                         Py_ssize_t i;
00242                         int longflag = 0;
00243                         int size_tflag = 0;
00244                         /* parse the width.precision part (we're only
00245                            interested in the precision value, if any) */
00246                         n = 0;
00247                         while (isdigit(Py_CHARMASK(*f)))
00248                                 n = (n*10) + *f++ - '0';
00249                         if (*f == '.') {
00250                                 f++;
00251                                 n = 0;
00252                                 while (isdigit(Py_CHARMASK(*f)))
00253                                         n = (n*10) + *f++ - '0';
00254                         }
00255                         while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
00256                                 f++;
00257                         /* handle the long flag, but only for %ld.  others
00258                            can be added when necessary. */
00259                         if (*f == 'l' && *(f+1) == 'd') {
00260                                 longflag = 1;
00261                                 ++f;
00262                         }
00263                         /* handle the size_t flag. */
00264                         if (*f == 'z' && *(f+1) == 'd') {
00265                                 size_tflag = 1;
00266                                 ++f;
00267                         }
00268 
00269                         switch (*f) {
00270                         case 'c':
00271                                 *s++ = va_arg(vargs, int);
00272                                 break;
00273                         case 'd':
00274                                 if (longflag)
00275                                         sprintf(s, "%ld", va_arg(vargs, long));
00276                                 else if (size_tflag) {
00277                                         /* Instead of checking whether the C
00278                                            library supports %zd, handle the
00279                                            common cases. */
00280                                         #if SIZEOF_SIZE_T == SIZEOF_LONG
00281                                         sprintf(s, "%ld", va_arg(vargs, long));
00282                                         #elif defined(MS_WINDOWS)
00283                                         sprintf(s, "%Id", va_arg(vargs, size_t));
00284                                         #else
00285                                         #error Cannot print size_t values
00286                                         #endif
00287                                 }
00288                                 else
00289                                         sprintf(s, "%d", va_arg(vargs, int));
00290                                 s += strlen(s);
00291                                 break;
00292                         case 'i':
00293                                 sprintf(s, "%i", va_arg(vargs, int));
00294                                 s += strlen(s);
00295                                 break;
00296                         case 'x':
00297                                 sprintf(s, "%x", va_arg(vargs, int));
00298                                 s += strlen(s);
00299                                 break;
00300                         case 's':
00301                                 p = va_arg(vargs, char*);
00302                                 i = strlen(p);
00303                                 if (n > 0 && i > n)
00304                                         i = n;
00305                                 memcpy(s, p, i);
00306                                 s += i;
00307                                 break;
00308                         case 'p':
00309                                 sprintf(s, "%p", va_arg(vargs, void*));
00310                                 /* %p is ill-defined:  ensure leading 0x. */
00311                                 if (s[1] == 'X')
00312                                         s[1] = 'x';
00313                                 else if (s[1] != 'x') {
00314                                         memmove(s+2, s, strlen(s)+1);
00315                                         s[0] = '0';
00316                                         s[1] = 'x';
00317                                 }
00318                                 s += strlen(s);
00319                                 break;
00320                         case '%':
00321                                 *s++ = '%';
00322                                 break;
00323                         default:
00324                                 strcpy(s, p);
00325                                 s += strlen(s);
00326                                 goto end;
00327                         }
00328                 } else
00329                         *s++ = *f;
00330         }
00331 
00332  end:
00333         _PyString_Resize(&string, s - PyString_AS_STRING(string));
00334         return string;
00335 }
00336 
00337 PyObject *
00338 PyString_FromFormat(const char *format, ...)
00339 {
00340         PyObject* ret;
00341         va_list vargs;
00342 
00343 #ifdef HAVE_STDARG_PROTOTYPES
00344         va_start(vargs, format);
00345 #else
00346         va_start(vargs);
00347 #endif
00348         ret = PyString_FromFormatV(format, vargs);
00349         va_end(vargs);
00350         return ret;
00351 }
00352 
00353 
00354 PyObject *PyString_Decode(const char *s,
00355                           Py_ssize_t size,
00356                           const char *encoding,
00357                           const char *errors)
00358 {
00359     PyObject *v, *str;
00360 
00361     str = PyString_FromStringAndSize(s, size);
00362     if (str == NULL)
00363         return NULL;
00364     v = PyString_AsDecodedString(str, encoding, errors);
00365     Py_DECREF(str);
00366     return v;
00367 }
00368 
00369 PyObject *PyString_AsDecodedObject(PyObject *str,
00370                                    const char *encoding,
00371                                    const char *errors)
00372 {
00373     PyObject *v;
00374 
00375     if (!PyString_Check(str)) {
00376         PyErr_BadArgument();
00377         goto onError;
00378     }
00379 
00380     if (encoding == NULL) {
00381 #ifdef Py_USING_UNICODE
00382         encoding = PyUnicode_GetDefaultEncoding();
00383 #else
00384         PyErr_SetString(PyExc_ValueError, "no encoding specified");
00385         goto onError;
00386 #endif
00387     }
00388 
00389     /* Decode via the codec registry */
00390     v = PyCodec_Decode(str, encoding, errors);
00391     if (v == NULL)
00392         goto onError;
00393 
00394     return v;
00395 
00396  onError:
00397     return NULL;
00398 }
00399 
00400 PyObject *PyString_AsDecodedString(PyObject *str,
00401                                    const char *encoding,
00402                                    const char *errors)
00403 {
00404     PyObject *v;
00405 
00406     v = PyString_AsDecodedObject(str, encoding, errors);
00407     if (v == NULL)
00408         goto onError;
00409 
00410 #ifdef Py_USING_UNICODE
00411     /* Convert Unicode to a string using the default encoding */
00412     if (PyUnicode_Check(v)) {
00413         PyObject *temp = v;
00414         v = PyUnicode_AsEncodedString(v, NULL, NULL);
00415         Py_DECREF(temp);
00416         if (v == NULL)
00417             goto onError;
00418     }
00419 #endif
00420     if (!PyString_Check(v)) {
00421         PyErr_Format(PyExc_TypeError,
00422                      "decoder did not return a string object (type=%.400s)",
00423                      v->ob_type->tp_name);
00424         Py_DECREF(v);
00425         goto onError;
00426     }
00427 
00428     return v;
00429 
00430  onError:
00431     return NULL;
00432 }
00433 
00434 PyObject *PyString_Encode(const char *s,
00435                           Py_ssize_t size,
00436                           const char *encoding,
00437                           const char *errors)
00438 {
00439     PyObject *v, *str;
00440 
00441     str = PyString_FromStringAndSize(s, size);
00442     if (str == NULL)
00443         return NULL;
00444     v = PyString_AsEncodedString(str, encoding, errors);
00445     Py_DECREF(str);
00446     return v;
00447 }
00448 
00449 PyObject *PyString_AsEncodedObject(PyObject *str,
00450                                    const char *encoding,
00451                                    const char *errors)
00452 {
00453     PyObject *v;
00454 
00455     if (!PyString_Check(str)) {
00456         PyErr_BadArgument();
00457         goto onError;
00458     }
00459 
00460     if (encoding == NULL) {
00461 #ifdef Py_USING_UNICODE
00462         encoding = PyUnicode_GetDefaultEncoding();
00463 #else
00464         PyErr_SetString(PyExc_ValueError, "no encoding specified");
00465         goto onError;
00466 #endif
00467     }
00468 
00469     /* Encode via the codec registry */
00470     v = PyCodec_Encode(str, encoding, errors);
00471     if (v == NULL)
00472         goto onError;
00473 
00474     return v;
00475 
00476  onError:
00477     return NULL;
00478 }
00479 
00480 PyObject *PyString_AsEncodedString(PyObject *str,
00481                                    const char *encoding,
00482                                    const char *errors)
00483 {
00484     PyObject *v;
00485 
00486     v = PyString_AsEncodedObject(str, encoding, errors);
00487     if (v == NULL)
00488         goto onError;
00489 
00490 #ifdef Py_USING_UNICODE
00491     /* Convert Unicode to a string using the default encoding */
00492     if (PyUnicode_Check(v)) {
00493         PyObject *temp = v;
00494         v = PyUnicode_AsEncodedString(v, NULL, NULL);
00495         Py_DECREF(temp);
00496         if (v == NULL)
00497             goto onError;
00498     }
00499 #endif
00500     if (!PyString_Check(v)) {
00501         PyErr_Format(PyExc_TypeError,
00502                      "encoder did not return a string object (type=%.400s)",
00503                      v->ob_type->tp_name);
00504         Py_DECREF(v);
00505         goto onError;
00506     }
00507 
00508     return v;
00509 
00510  onError:
00511     return NULL;
00512 }
00513 
00514 static void
00515 string_dealloc(PyObject *op)
00516 {
00517         switch (PyString_CHECK_INTERNED(op)) {
00518                 case SSTATE_NOT_INTERNED:
00519                         break;
00520 
00521                 case SSTATE_INTERNED_MORTAL:
00522                         /* revive dead object temporarily for DelItem */
00523                         op->ob_refcnt = 3;
00524                         if (PyDict_DelItem(interned, op) != 0)
00525                                 Py_FatalError(
00526                                         "deletion of interned string failed");
00527                         break;
00528 
00529                 case SSTATE_INTERNED_IMMORTAL:
00530                         Py_FatalError("Immortal interned string died.");
00531 
00532                 default:
00533                         Py_FatalError("Inconsistent interned string state.");
00534         }
00535         op->ob_type->tp_free(op);
00536 }
00537 
00538 /* Unescape a backslash-escaped string. If unicode is non-zero,
00539    the string is a u-literal. If recode_encoding is non-zero,
00540    the string is UTF-8 encoded and should be re-encoded in the
00541    specified encoding.  */
00542 
00543 PyObject *PyString_DecodeEscape(const char *s,
00544                                 Py_ssize_t len,
00545                                 const char *errors,
00546                                 Py_ssize_t unicode,
00547                                 const char *recode_encoding)
00548 {
00549         int c;
00550         char *p, *buf;
00551         const char *end;
00552         PyObject *v;
00553         Py_ssize_t newlen = recode_encoding ? 4*len:len;
00554         v = PyString_FromStringAndSize((char *)NULL, newlen);
00555         if (v == NULL)
00556                 return NULL;
00557         p = buf = PyString_AsString(v);
00558         end = s + len;
00559         while (s < end) {
00560                 if (*s != '\\') {
00561                   non_esc:
00562 #ifdef Py_USING_UNICODE
00563                         if (recode_encoding && (*s & 0x80)) {
00564                                 PyObject *u, *w;
00565                                 char *r;
00566                                 const char* t;
00567                                 Py_ssize_t rn;
00568                                 t = s;
00569                                 /* Decode non-ASCII bytes as UTF-8. */
00570                                 while (t < end && (*t & 0x80)) t++;
00571                                 u = PyUnicode_DecodeUTF8(s, t - s, errors);
00572                                 if(!u) goto failed;
00573 
00574                                 /* Recode them in target encoding. */
00575                                 w = PyUnicode_AsEncodedString(
00576                                         u, recode_encoding, errors);
00577                                 Py_DECREF(u);
00578                                 if (!w) goto failed;
00579 
00580                                 /* Append bytes to output buffer. */
00581                                 r = PyString_AsString(w);
00582                                 rn = PyString_Size(w);
00583                                 memcpy(p, r, rn);
00584                                 p += rn;
00585                                 Py_DECREF(w);
00586                                 s = t;
00587                         } else {
00588                                 *p++ = *s++;
00589                         }
00590 #else
00591                         *p++ = *s++;
00592 #endif
00593                         continue;
00594                 }
00595                 s++;
00596                 if (s==end) {
00597                         PyErr_SetString(PyExc_ValueError,
00598                                         "Trailing \\ in string");
00599                         goto failed;
00600                 }
00601                 switch (*s++) {
00602                 /* XXX This assumes ASCII! */
00603                 case '\n': break;
00604                 case '\\': *p++ = '\\'; break;
00605                 case '\'': *p++ = '\''; break;
00606                 case '\"': *p++ = '\"'; break;
00607                 case 'b': *p++ = '\b'; break;
00608                 case 'f': *p++ = '\014'; break; /* FF */
00609                 case 't': *p++ = '\t'; break;
00610                 case 'n': *p++ = '\n'; break;
00611                 case 'r': *p++ = '\r'; break;
00612                 case 'v': *p++ = '\013'; break; /* VT */
00613                 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
00614                 case '0': case '1': case '2': case '3':
00615                 case '4': case '5': case '6': case '7':
00616                         c = s[-1] - '0';
00617                         if ('0' <= *s && *s <= '7') {
00618                                 c = (c<<3) + *s++ - '0';
00619                                 if ('0' <= *s && *s <= '7')
00620                                         c = (c<<3) + *s++ - '0';
00621                         }
00622                         *p++ = c;
00623                         break;
00624                 case 'x':
00625                         if (isxdigit(Py_CHARMASK(s[0])) 
00626                             && isxdigit(Py_CHARMASK(s[1]))) {
00627                                 unsigned int x = 0;
00628                                 c = Py_CHARMASK(*s);
00629                                 s++;
00630                                 if (isdigit(c))
00631                                         x = c - '0';
00632                                 else if (islower(c))
00633                                         x = 10 + c - 'a';
00634                                 else
00635                                         x = 10 + c - 'A';
00636                                 x = x << 4;
00637                                 c = Py_CHARMASK(*s);
00638                                 s++;
00639                                 if (isdigit(c))
00640                                         x += c - '0';
00641                                 else if (islower(c))
00642                                         x += 10 + c - 'a';
00643                                 else
00644                                         x += 10 + c - 'A';
00645                                 *p++ = x;
00646                                 break;
00647                         }
00648                         if (!errors || strcmp(errors, "strict") == 0) {
00649                                 PyErr_SetString(PyExc_ValueError, 
00650                                                 "invalid \\x escape");
00651                                 goto failed;
00652                         }
00653                         if (strcmp(errors, "replace") == 0) {
00654                                 *p++ = '?';
00655                         } else if (strcmp(errors, "ignore") == 0)
00656                                 /* do nothing */;
00657                         else {
00658                                 PyErr_Format(PyExc_ValueError,
00659                                              "decoding error; "
00660                                              "unknown error handling code: %.400s",
00661                                              errors);
00662                                 goto failed;
00663                         }
00664 #ifndef Py_USING_UNICODE
00665                 case 'u':
00666                 case 'U':
00667                 case 'N':
00668                         if (unicode) {
00669                                 PyErr_SetString(PyExc_ValueError,
00670                                           "Unicode escapes not legal "
00671                                           "when Unicode disabled");
00672                                 goto failed;
00673                         }
00674 #endif
00675                 default:
00676                         *p++ = '\\';
00677                         s--;
00678                         goto non_esc; /* an arbitry number of unescaped
00679                                          UTF-8 bytes may follow. */
00680                 }
00681         }
00682         if (p-buf < newlen)
00683                 _PyString_Resize(&v, p - buf);
00684         return v;
00685   failed:
00686         Py_DECREF(v);
00687         return NULL;
00688 }
00689 
00690 static Py_ssize_t
00691 string_getsize(register PyObject *op)
00692 {
00693         char *s;
00694         Py_ssize_t len;
00695         if (PyString_AsStringAndSize(op, &s, &len))
00696                 return -1;
00697         return len;
00698 }
00699 
00700 static /*const*/ char *
00701 string_getbuffer(register PyObject *op)
00702 {
00703         char *s;
00704         Py_ssize_t len;
00705         if (PyString_AsStringAndSize(op, &s, &len))
00706                 return NULL;
00707         return s;
00708 }
00709 
00710 Py_ssize_t
00711 PyString_Size(register PyObject *op)
00712 {
00713         if (!PyString_Check(op))
00714                 return string_getsize(op);
00715         return ((PyStringObject *)op) -> ob_size;
00716 }
00717 
00718 /*const*/ char *
00719 PyString_AsString(register PyObject *op)
00720 {
00721         if (!PyString_Check(op))
00722                 return string_getbuffer(op);
00723         return ((PyStringObject *)op) -> ob_sval;
00724 }
00725 
00726 int
00727 PyString_AsStringAndSize(register PyObject *obj,
00728                          register char **s,
00729                          register Py_ssize_t *len)
00730 {
00731         if (s == NULL) {
00732                 PyErr_BadInternalCall();
00733                 return -1;
00734         }
00735 
00736         if (!PyString_Check(obj)) {
00737 #ifdef Py_USING_UNICODE
00738                 if (PyUnicode_Check(obj)) {
00739                         obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
00740                         if (obj == NULL)
00741                                 return -1;
00742                 }
00743                 else
00744 #endif
00745                 {
00746                         PyErr_Format(PyExc_TypeError,
00747                                      "expected string or Unicode object, "
00748                                      "%.200s found", obj->ob_type->tp_name);
00749                         return -1;
00750                 }
00751         }
00752 
00753         *s = PyString_AS_STRING(obj);
00754         if (len != NULL)
00755                 *len = PyString_GET_SIZE(obj);
00756         else if (strlen(*s) != PyString_GET_SIZE(obj)) {
00757                 PyErr_SetString(PyExc_TypeError,
00758                                 "expected string without null bytes");
00759                 return -1;
00760         }
00761         return 0;
00762 }
00763 
00764 /* Methods */
00765 
00766 static int
00767 string_print(PyStringObject *op, FILE *fp, int flags)
00768 {
00769         Py_ssize_t i;
00770         char c;
00771         int quote;
00772 
00773         /* XXX Ought to check for interrupts when writing long strings */
00774         if (! PyString_CheckExact(op)) {
00775                 int ret;
00776                 /* A str subclass may have its own __str__ method. */
00777                 op = (PyStringObject *) PyObject_Str((PyObject *)op);
00778                 if (op == NULL)
00779                         return -1;
00780                 ret = string_print(op, fp, flags);
00781                 Py_DECREF(op);
00782                 return ret;
00783         }
00784         if (flags & Py_PRINT_RAW) {
00785 #ifdef __VMS
00786                 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
00787 #else
00788                 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
00789 #endif
00790                 return 0;
00791         }
00792 
00793         /* figure out which quote to use; single is preferred */
00794         quote = '\'';
00795         if (memchr(op->ob_sval, '\'', op->ob_size) &&
00796             !memchr(op->ob_sval, '"', op->ob_size))
00797                 quote = '"';
00798 
00799         fputc(quote, fp);
00800         for (i = 0; i < op->ob_size; i++) {
00801                 c = op->ob_sval[i];
00802                 if (c == quote || c == '\\')
00803                         fprintf(fp, "\\%c", c);
00804                 else if (c == '\t')
00805                         fprintf(fp, "\\t");
00806                 else if (c == '\n')
00807                         fprintf(fp, "\\n");
00808                 else if (c == '\r')
00809                         fprintf(fp, "\\r");
00810                 else if (c < ' ' || c >= 0x7f)
00811                         fprintf(fp, "\\x%02x", c & 0xff);
00812                 else
00813                         fputc(c, fp);
00814         }
00815         fputc(quote, fp);
00816         return 0;
00817 }
00818 
00819 PyObject *
00820 PyString_Repr(PyObject *obj, int smartquotes)
00821 {
00822         register PyStringObject* op = (PyStringObject*) obj;
00823         size_t newsize = 2 + 4 * op->ob_size;
00824         PyObject *v;
00825         if (newsize > INT_MAX) {
00826                 PyErr_SetString(PyExc_OverflowError,
00827                         "string is too large to make repr");
00828         }
00829         v = PyString_FromStringAndSize((char *)NULL, newsize);
00830         if (v == NULL) {
00831                 return NULL;
00832         }
00833         else {
00834                 register Py_ssize_t i;
00835                 register char c;
00836                 register char *p;
00837                 int quote;
00838 
00839                 /* figure out which quote to use; single is preferred */
00840                 quote = '\'';
00841                 if (smartquotes && 
00842                     memchr(op->ob_sval, '\'', op->ob_size) &&
00843                     !memchr(op->ob_sval, '"', op->ob_size))
00844                         quote = '"';
00845 
00846                 p = PyString_AS_STRING(v);
00847                 *p++ = quote;
00848                 for (i = 0; i < op->ob_size; i++) {
00849                         /* There's at least enough room for a hex escape
00850                            and a closing quote. */
00851                         assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
00852                         c = op->ob_sval[i];
00853                         if (c == quote || c == '\\')
00854                                 *p++ = '\\', *p++ = c;
00855                         else if (c == '\t')
00856                                 *p++ = '\\', *p++ = 't';
00857                         else if (c == '\n')
00858                                 *p++ = '\\', *p++ = 'n';
00859                         else if (c == '\r')
00860                                 *p++ = '\\', *p++ = 'r';
00861                         else if (c < ' ' || c >= 0x7f) {
00862                                 /* For performance, we don't want to call
00863                                    PyOS_snprintf here (extra layers of
00864                                    function call). */
00865                                 sprintf(p, "\\x%02x", c & 0xff);
00866                                 p += 4;
00867                         }
00868                         else
00869                                 *p++ = c;
00870                 }
00871                 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
00872                 *p++ = quote;
00873                 *p = '\0';
00874                 _PyString_Resize(
00875                         &v, (int) (p - PyString_AS_STRING(v)));
00876                 return v;
00877         }
00878 }
00879 
00880 static PyObject *
00881 string_repr(PyObject *op)
00882 {
00883         return PyString_Repr(op, 1);
00884 }
00885 
00886 static PyObject *
00887 string_str(PyObject *s)
00888 {
00889         assert(PyString_Check(s));
00890         if (PyString_CheckExact(s)) {
00891                 Py_INCREF(s);
00892                 return s;
00893         }
00894         else {
00895                 /* Subtype -- return genuine string with the same value. */
00896                 PyStringObject *t = (PyStringObject *) s;
00897                 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
00898         }
00899 }
00900 
00901 static Py_ssize_t
00902 string_length(PyStringObject *a)
00903 {
00904         return a->ob_size;
00905 }
00906 
00907 static PyObject *
00908 string_concat(register PyStringObject *a, register PyObject *bb)
00909 {
00910         register size_t size;
00911         register PyStringObject *op;
00912         if (!PyString_Check(bb)) {
00913 #ifdef Py_USING_UNICODE
00914                 if (PyUnicode_Check(bb))
00915                     return PyUnicode_Concat((PyObject *)a, bb);
00916 #endif
00917                 PyErr_Format(PyExc_TypeError,
00918                              "cannot concatenate 'str' and '%.200s' objects",
00919                              bb->ob_type->tp_name);
00920                 return NULL;
00921         }
00922 #define b ((PyStringObject *)bb)
00923         /* Optimize cases with empty left or right operand */
00924         if ((a->ob_size == 0 || b->ob_size == 0) &&
00925             PyString_CheckExact(a) && PyString_CheckExact(b)) {
00926                 if (a->ob_size == 0) {
00927                         Py_INCREF(bb);
00928                         return bb;
00929                 }
00930                 Py_INCREF(a);
00931                 return (PyObject *)a;
00932         }
00933         size = a->ob_size + b->ob_size;
00934         /* XXX check overflow */
00935         /* Inline PyObject_NewVar */
00936         op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
00937         if (op == NULL)
00938                 return PyErr_NoMemory();
00939         PyObject_INIT_VAR(op, &PyString_Type, size);
00940         op->ob_shash = -1;
00941         op->ob_sstate = SSTATE_NOT_INTERNED;
00942         memcpy(op->ob_sval, a->ob_sval, a->ob_size);
00943         memcpy(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
00944         op->ob_sval[size] = '\0';
00945         return (PyObject *) op;
00946 #undef b
00947 }
00948 
00949 static PyObject *
00950 string_repeat(register PyStringObject *a, register Py_ssize_t n)
00951 {
00952         register Py_ssize_t i;
00953         register Py_ssize_t j;
00954         register Py_ssize_t size;
00955         register PyStringObject *op;
00956         size_t nbytes;
00957         if (n < 0)
00958                 n = 0;
00959         /* watch out for overflows:  the size can overflow int,
00960          * and the # of bytes needed can overflow size_t
00961          */
00962         size = a->ob_size * n;
00963         if (n && size / n != a->ob_size) {
00964                 PyErr_SetString(PyExc_OverflowError,
00965                         "repeated string is too long");
00966                 return NULL;
00967         }
00968         if (size == a->ob_size && PyString_CheckExact(a)) {
00969                 Py_INCREF(a);
00970                 return (PyObject *)a;
00971         }
00972         nbytes = (size_t)size;
00973         if (nbytes + sizeof(PyStringObject) <= nbytes) {
00974                 PyErr_SetString(PyExc_OverflowError,
00975                         "repeated string is too long");
00976                 return NULL;
00977         }
00978         op = (PyStringObject *)
00979                 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
00980         if (op == NULL)
00981                 return PyErr_NoMemory();
00982         PyObject_INIT_VAR(op, &PyString_Type, size);
00983         op->ob_shash = -1;
00984         op->ob_sstate = SSTATE_NOT_INTERNED;
00985         op->ob_sval[size] = '\0';
00986         if (a->ob_size == 1 && n > 0) {
00987                 memset(op->ob_sval, a->ob_sval[0] , n);
00988                 return (PyObject *) op;
00989         }
00990         i = 0;
00991         if (i < size) {
00992                 memcpy(op->ob_sval, a->ob_sval, a->ob_size);
00993                 i = a->ob_size;
00994         }
00995         while (i < size) {
00996                 j = (i <= size-i)  ?  i  :  size-i;
00997                 memcpy(op->ob_sval+i, op->ob_sval, j);
00998                 i += j;
00999         }
01000         return (PyObject *) op;
01001 }
01002 
01003 /* String slice a[i:j] consists of characters a[i] ... a[j-1] */
01004 
01005 static PyObject *
01006 string_slice(register PyStringObject *a, register Py_ssize_t i, 
01007              register Py_ssize_t j)
01008      /* j -- may be negative! */
01009 {
01010         if (i < 0)
01011                 i = 0;
01012         if (j < 0)
01013                 j = 0; /* Avoid signed/unsigned bug in next line */
01014         if (j > a->ob_size)
01015                 j = a->ob_size;
01016         if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
01017                 /* It's the same as a */
01018                 Py_INCREF(a);
01019                 return (PyObject *)a;
01020         }
01021         if (j < i)
01022                 j = i;
01023         return PyString_FromStringAndSize(a->ob_sval + i, j-i);
01024 }
01025 
01026 static int
01027 string_contains(PyObject *a, PyObject *el)
01028 {
01029         char *s = PyString_AS_STRING(a);
01030         const char *sub = PyString_AS_STRING(el);
01031         char *last;
01032         Py_ssize_t len_sub = PyString_GET_SIZE(el);
01033         Py_ssize_t shortsub;
01034         char firstchar, lastchar;
01035 
01036         if (!PyString_CheckExact(el)) {
01037 #ifdef Py_USING_UNICODE
01038                 if (PyUnicode_Check(el))
01039                         return PyUnicode_Contains(a, el);
01040 #endif
01041                 if (!PyString_Check(el)) {
01042                         PyErr_SetString(PyExc_TypeError,
01043                             "'in <string>' requires string as left operand");
01044                         return -1;
01045                 }
01046         }
01047 
01048         if (len_sub == 0)
01049                 return 1;
01050         /* last points to one char beyond the start of the rightmost 
01051            substring.  When s<last, there is still room for a possible match
01052            and s[0] through s[len_sub-1] will be in bounds.
01053            shortsub is len_sub minus the last character which is checked
01054            separately just before the memcmp().  That check helps prevent
01055            false starts and saves the setup time for memcmp().
01056         */
01057         firstchar = sub[0];
01058         shortsub = len_sub - 1;
01059         lastchar = sub[shortsub];
01060         last = s + PyString_GET_SIZE(a) - len_sub + 1;
01061         while (s < last) {
01062                 s = memchr(s, firstchar, last-s);
01063                 if (s == NULL)
01064                         return 0;
01065                 assert(s < last);
01066                 if (s[shortsub] == lastchar && memcmp(s, sub, shortsub) == 0)
01067                         return 1;
01068                 s++;
01069         }
01070         return 0;
01071 }
01072 
01073 static PyObject *
01074 string_item(PyStringObject *a, register Py_ssize_t i)
01075 {
01076         PyObject *v;
01077         char *pchar;
01078         if (i < 0 || i >= a->ob_size) {
01079                 PyErr_SetString(PyExc_IndexError, "string index out of range");
01080                 return NULL;
01081         }
01082         pchar = a->ob_sval + i;
01083         v = (PyObject *)characters[*pchar & UCHAR_MAX];
01084         if (v == NULL)
01085                 v = PyString_FromStringAndSize(pchar, 1);
01086         else {
01087 #ifdef COUNT_ALLOCS
01088                 one_strings++;
01089 #endif
01090                 Py_INCREF(v);
01091         }
01092         return v;
01093 }
01094 
01095 static PyObject*
01096 string_richcompare(PyStringObject *a, PyStringObject *b, int op)
01097 {
01098         int c;
01099         Py_ssize_t len_a, len_b;
01100         Py_ssize_t min_len;
01101         PyObject *result;
01102 
01103         /* Make sure both arguments are strings. */
01104         if (!(PyString_Check(a) && PyString_Check(b))) {
01105                 result = Py_NotImplemented;
01106                 goto out;
01107         }
01108         if (a == b) {
01109                 switch (op) {
01110                 case Py_EQ:case Py_LE:case Py_GE:
01111                         result = Py_True;
01112                         goto out;
01113                 case Py_NE:case Py_LT:case Py_GT:
01114                         result = Py_False;
01115                         goto out;
01116                 }
01117         }
01118         if (op == Py_EQ) {
01119                 /* Supporting Py_NE here as well does not save
01120                    much time, since Py_NE is rarely used.  */
01121                 if (a->ob_size == b->ob_size
01122                     && (a->ob_sval[0] == b->ob_sval[0]
01123                         && memcmp(a->ob_sval, b->ob_sval,
01124                                   a->ob_size) == 0)) {
01125                         result = Py_True;
01126                 } else {
01127                         result = Py_False;
01128                 }
01129                 goto out;
01130         }
01131         len_a = a->ob_size; len_b = b->ob_size;
01132         min_len = (len_a < len_b) ? len_a : len_b;
01133         if (min_len > 0) {
01134                 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
01135                 if (c==0)
01136                         c = memcmp(a->ob_sval, b->ob_sval, min_len);
01137         }else
01138                 c = 0;
01139         if (c == 0)
01140                 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
01141         switch (op) {
01142         case Py_LT: c = c <  0; break;
01143         case Py_LE: c = c <= 0; break;
01144         case Py_EQ: assert(0);  break; /* unreachable */
01145         case Py_NE: c = c != 0; break;
01146         case Py_GT: c = c >  0; break;
01147         case Py_GE: c = c >= 0; break;
01148         default:
01149                 result = Py_NotImplemented;
01150                 goto out;
01151         }
01152         result = c ? Py_True : Py_False;
01153   out:
01154         Py_INCREF(result);
01155         return result;
01156 }
01157 
01158 int
01159 _PyString_Eq(PyObject *o1, PyObject *o2)
01160 {
01161         PyStringObject *a, *b;
01162         a = (PyStringObject*)o1;
01163         b = (PyStringObject*)o2;
01164         return a->ob_size == b->ob_size
01165           && *a->ob_sval == *b->ob_sval
01166           && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
01167 }
01168 
01169 static long
01170 string_hash(PyStringObject *a)
01171 {
01172         register Py_ssize_t len;
01173         register unsigned char *p;
01174         register long x;
01175 
01176         if (a->ob_shash != -1)
01177                 return a->ob_shash;
01178         len = a->ob_size;
01179         p = (unsigned char *) a->ob_sval;
01180         x = *p << 7;
01181         while (--len >= 0)
01182                 x = (1000003*x) ^ *p++;
01183         x ^= a->ob_size;
01184         if (x == -1)
01185                 x = -2;
01186         a->ob_shash = x;
01187         return x;
01188 }
01189 
01190 static PyObject*
01191 string_subscript(PyStringObject* self, PyObject* item)
01192 {
01193         if (PyInt_Check(item) || PyLong_Check(item)) {
01194                 Py_ssize_t i = PyInt_AsSsize_t(item);
01195                 if (i == -1 && PyErr_Occurred())
01196                         return NULL;
01197                 if (i < 0)
01198                         i += PyString_GET_SIZE(self);
01199                 return string_item(self,i);
01200         }
01201         else if (PySlice_Check(item)) {
01202                 Py_ssize_t start, stop, step, slicelength, cur, i;
01203                 char* source_buf;
01204                 char* result_buf;
01205                 PyObject* result;
01206 
01207                 if (PySlice_GetIndicesEx((PySliceObject*)item, 
01208                                  PyString_GET_SIZE(self),
01209                                  &start, &stop, &step, &slicelength) < 0) {
01210                         return NULL;
01211                 }
01212 
01213                 if (slicelength <= 0) {
01214                         return PyString_FromStringAndSize("", 0);
01215                 }
01216                 else {
01217                         source_buf = PyString_AsString((PyObject*)self);
01218                         result_buf = PyMem_Malloc(slicelength);
01219                         if (result_buf == NULL)
01220                                 return PyErr_NoMemory();
01221 
01222                         for (cur = start, i = 0; i < slicelength; 
01223                              cur += step, i++) {
01224                                 result_buf[i] = source_buf[cur];
01225                         }
01226                         
01227                         result = PyString_FromStringAndSize(result_buf, 
01228                                                             slicelength);
01229                         PyMem_Free(result_buf);
01230                         return result;
01231                 }
01232         } 
01233         else {
01234                 PyErr_SetString(PyExc_TypeError, 
01235                                 "string indices must be integers");
01236                 return NULL;
01237         }
01238 }
01239 
01240 static Py_ssize_t
01241 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
01242 {
01243         if ( index != 0 ) {
01244                 PyErr_SetString(PyExc_SystemError,
01245                                 "accessing non-existent string segment");
01246                 return -1;
01247         }
01248         *ptr = (void *)self->ob_sval;
01249         return self->ob_size;
01250 }
01251 
01252 static Py_ssize_t
01253 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
01254 {
01255         PyErr_SetString(PyExc_TypeError,
01256                         "Cannot use string as modifiable buffer");
01257         return -1;
01258 }
01259 
01260 static Py_ssize_t
01261 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
01262 {
01263         if ( lenp )
01264                 *lenp = self->ob_size;
01265         return 1;
01266 }
01267 
01268 static Py_ssize_t
01269 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
01270 {
01271         if ( index != 0 ) {
01272                 PyErr_SetString(PyExc_SystemError,
01273                                 "accessing non-existent string segment");
01274                 return -1;
01275         }
01276         *ptr = self->ob_sval;
01277         return self->ob_size;
01278 }
01279 
01280 static PySequenceMethods string_as_sequence = {
01281         (lenfunc)string_length, /*sq_length*/
01282         (binaryfunc)string_concat, /*sq_concat*/
01283         (ssizeargfunc)string_repeat, /*sq_repeat*/
01284         (ssizeargfunc)string_item, /*sq_item*/
01285         (ssizessizeargfunc)string_slice, /*sq_slice*/
01286         0,              /*sq_ass_item*/
01287         0,              /*sq_ass_slice*/
01288         (objobjproc)string_contains /*sq_contains*/
01289 };
01290 
01291 static PyMappingMethods string_as_mapping = {
01292         (lenfunc)string_length,
01293         (binaryfunc)string_subscript,
01294         0,
01295 };
01296 
01297 static PyBufferProcs string_as_buffer = {
01298         (readbufferproc)string_buffer_getreadbuf,
01299         (writebufferproc)string_buffer_getwritebuf,
01300         (segcountproc)string_buffer_getsegcount,
01301         (charbufferproc)string_buffer_getcharbuf,
01302 };
01303 
01304 
01305 
01306 #define LEFTSTRIP 0
01307 #define RIGHTSTRIP 1
01308 #define BOTHSTRIP 2
01309 
01310 /* Arrays indexed by above */
01311 static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
01312 
01313 #define STRIPNAME(i) (stripformat[i]+3)
01314 
01315 #define SPLIT_APPEND(data, left, right)                         \
01316         str = PyString_FromStringAndSize((data) + (left),       \
01317                                          (right) - (left));     \
01318         if (str == NULL)                                        \
01319                 goto onError;                                   \
01320         if (PyList_Append(list, str)) {                         \
01321                 Py_DECREF(str);                                 \
01322                 goto onError;                                   \
01323         }                                                       \
01324         else                                                    \
01325                 Py_DECREF(str);
01326 
01327 #define SPLIT_INSERT(data, left, right)                         \
01328         str = PyString_FromStringAndSize((data) + (left),       \
01329                                          (right) - (left));     \
01330         if (str == NULL)                                        \
01331                 goto onError;                                   \
01332         if (PyList_Insert(list, 0, str)) {                      \
01333                 Py_DECREF(str);                                 \
01334                 goto onError;                                   \
01335         }                                                       \
01336         else                                                    \
01337                 Py_DECREF(str);
01338 
01339 static PyObject *
01340 split_whitespace(const char *s, Py_ssize_t len, int maxsplit)
01341 {
01342         Py_ssize_t i, j;
01343         PyObject *str;
01344         PyObject *list = PyList_New(0);
01345 
01346         if (list == NULL)
01347                 return NULL;
01348 
01349         for (i = j = 0; i < len; ) {
01350                 while (i < len && isspace(Py_CHARMASK(s[i])))
01351                         i++;
01352                 j = i;
01353                 while (i < len && !isspace(Py_CHARMASK(s[i])))
01354                         i++;
01355                 if (j < i) {
01356                         if (maxsplit-- <= 0)
01357                                 break;
01358                         SPLIT_APPEND(s, j, i);
01359                         while (i < len && isspace(Py_CHARMASK(s[i])))
01360                                 i++;
01361                         j = i;
01362                 }
01363         }
01364         if (j < len) {
01365                 SPLIT_APPEND(s, j, len);
01366         }
01367         return list;
01368   onError:
01369         Py_DECREF(list);
01370         return NULL;
01371 }
01372 
01373 static PyObject *
01374 split_char(const char *s, Py_ssize_t len, char ch, int maxcount)
01375 {
01376         register Py_ssize_t i, j;
01377         PyObject *str;
01378         PyObject *list = PyList_New(0);
01379 
01380         if (list == NULL)
01381                 return NULL;
01382 
01383         for (i = j = 0; i < len; ) {
01384                 if (s[i] == ch) {
01385                         if (maxcount-- <= 0)
01386                                 break;
01387                         SPLIT_APPEND(s, j, i);
01388                         i = j = i + 1;
01389                 } else
01390                         i++;
01391         }
01392         if (j <= len) {
01393                 SPLIT_APPEND(s, j, len);
01394         }
01395         return list;
01396 
01397   onError:
01398         Py_DECREF(list);
01399         return NULL;
01400 }
01401 
01402 PyDoc_STRVAR(split__doc__,
01403 "S.split([sep [,maxsplit]]) -> list of strings\n\
01404 \n\
01405 Return a list of the words in the string S, using sep as the\n\
01406 delimiter string.  If maxsplit is given, at most maxsplit\n\
01407 splits are done. If sep is not specified or is None, any\n\
01408 whitespace string is a separator.");
01409 
01410 static PyObject *
01411 string_split(PyStringObject *self, PyObject *args)
01412 {
01413         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
01414         int err;
01415         int maxsplit = -1;
01416         const char *s = PyString_AS_STRING(self), *sub;
01417         PyObject *list, *item, *subobj = Py_None;
01418 
01419         if (!PyArg_ParseTuple(args, "|Oi:split", &subobj, &maxsplit))
01420                 return NULL;
01421         if (maxsplit < 0)
01422                 maxsplit = INT_MAX;
01423         if (subobj == Py_None)
01424                 return split_whitespace(s, len, maxsplit);
01425         if (PyString_Check(subobj)) {
01426                 sub = PyString_AS_STRING(subobj);
01427                 n = PyString_GET_SIZE(subobj);
01428         }
01429 #ifdef Py_USING_UNICODE
01430         else if (PyUnicode_Check(subobj))
01431                 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
01432 #endif
01433         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
01434                 return NULL;
01435 
01436         if (n == 0) {
01437                 PyErr_SetString(PyExc_ValueError, "empty separator");
01438                 return NULL;
01439         }
01440         else if (n == 1)
01441                 return split_char(s, len, sub[0], maxsplit);
01442 
01443         list = PyList_New(0);
01444         if (list == NULL)
01445                 return NULL;
01446 
01447         i = j = 0;
01448         while (i+n <= len) {
01449                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
01450                         if (maxsplit-- <= 0)
01451                                 break;
01452                         item = PyString_FromStringAndSize(s+j, i-j);
01453                         if (item == NULL)
01454                                 goto fail;
01455                         err = PyList_Append(list, item);
01456                         Py_DECREF(item);
01457                         if (err < 0)
01458                                 goto fail;
01459                         i = j = i + n;
01460                 }
01461                 else
01462                         i++;
01463         }
01464         item = PyString_FromStringAndSize(s+j, len-j);
01465         if (item == NULL)
01466                 goto fail;
01467         err = PyList_Append(list, item);
01468         Py_DECREF(item);
01469         if (err < 0)
01470                 goto fail;
01471 
01472         return list;
01473 
01474  fail:
01475         Py_DECREF(list);
01476         return NULL;
01477 }
01478 
01479 static PyObject *
01480 rsplit_whitespace(const char *s, Py_ssize_t len, int maxsplit)
01481 {
01482         Py_ssize_t i, j;
01483         PyObject *str;
01484         PyObject *list = PyList_New(0);
01485 
01486         if (list == NULL)
01487                 return NULL;
01488 
01489         for (i = j = len - 1; i >= 0; ) {
01490                 while (i >= 0 && isspace(Py_CHARMASK(s[i])))
01491                         i--;
01492                 j = i;
01493                 while (i >= 0 && !isspace(Py_CHARMASK(s[i])))
01494                         i--;
01495                 if (j > i) {
01496                         if (maxsplit-- <= 0)
01497                                 break;
01498                         SPLIT_INSERT(s, i + 1, j + 1);
01499                         while (i >= 0 && isspace(Py_CHARMASK(s[i])))
01500                                 i--;
01501                         j = i;
01502                 }
01503         }
01504         if (j >= 0) {
01505                 SPLIT_INSERT(s, 0, j + 1);
01506         }
01507         return list;
01508   onError:
01509         Py_DECREF(list);
01510         return NULL;
01511 }
01512 
01513 static PyObject *
01514 rsplit_char(const char *s, Py_ssize_t len, char ch, int maxcount)
01515 {
01516         register Py_ssize_t i, j;
01517         PyObject *str;
01518         PyObject *list = PyList_New(0);
01519 
01520         if (list == NULL)
01521                 return NULL;
01522 
01523         for (i = j = len - 1; i >= 0; ) {
01524                 if (s[i] == ch) {
01525                         if (maxcount-- <= 0)
01526                                 break;
01527                         SPLIT_INSERT(s, i + 1, j + 1);
01528                         j = i = i - 1;
01529                 } else
01530                         i--;
01531         }
01532         if (j >= -1) {
01533                 SPLIT_INSERT(s, 0, j + 1);
01534         }
01535         return list;
01536 
01537  onError:
01538         Py_DECREF(list);
01539         return NULL;
01540 }
01541 
01542 PyDoc_STRVAR(rsplit__doc__,
01543 "S.rsplit([sep [,maxsplit]]) -> list of strings\n\
01544 \n\
01545 Return a list of the words in the string S, using sep as the\n\
01546 delimiter string, starting at the end of the string and working\n\
01547 to the front.  If maxsplit is given, at most maxsplit splits are\n\
01548 done. If sep is not specified or is None, any whitespace string\n\
01549 is a separator.");
01550 
01551 static PyObject *
01552 string_rsplit(PyStringObject *self, PyObject *args)
01553 {
01554         Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
01555         int err;
01556         int maxsplit = -1;
01557         const char *s = PyString_AS_STRING(self), *sub;
01558         PyObject *list, *item, *subobj = Py_None;
01559 
01560         if (!PyArg_ParseTuple(args, "|Oi:rsplit", &subobj, &maxsplit))
01561                 return NULL;
01562         if (maxsplit < 0)
01563                 maxsplit = INT_MAX;
01564         if (subobj == Py_None)
01565                 return rsplit_whitespace(s, len, maxsplit);
01566         if (PyString_Check(subobj)) {
01567                 sub = PyString_AS_STRING(subobj);
01568                 n = PyString_GET_SIZE(subobj);
01569         }
01570 #ifdef Py_USING_UNICODE
01571         else if (PyUnicode_Check(subobj))
01572                 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
01573 #endif
01574         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
01575                 return NULL;
01576 
01577         if (n == 0) {
01578                 PyErr_SetString(PyExc_ValueError, "empty separator");
01579                 return NULL;
01580         }
01581         else if (n == 1)
01582                 return rsplit_char(s, len, sub[0], maxsplit);
01583 
01584         list = PyList_New(0);
01585         if (list == NULL)
01586                 return NULL;
01587 
01588         j = len;
01589         i = j - n;
01590         while (i >= 0) {
01591                 if (s[i] == sub[0] && memcmp(s+i, sub, n) == 0) {
01592                         if (maxsplit-- <= 0)
01593                                 break;
01594                         item = PyString_FromStringAndSize(s+i+n, j-i-n);
01595                         if (item == NULL)
01596                                 goto fail;
01597                         err = PyList_Insert(list, 0, item);
01598                         Py_DECREF(item);
01599                         if (err < 0)
01600                                 goto fail;
01601                         j = i;
01602                         i -= n;
01603                 }
01604                 else
01605                         i--;
01606         }
01607         item = PyString_FromStringAndSize(s, j);
01608         if (item == NULL)
01609                 goto fail;
01610         err = PyList_Insert(list, 0, item);
01611         Py_DECREF(item);
01612         if (err < 0)
01613                 goto fail;
01614 
01615         return list;
01616 
01617  fail:
01618         Py_DECREF(list);
01619         return NULL;
01620 }
01621 
01622 
01623 PyDoc_STRVAR(join__doc__,
01624 "S.join(sequence) -> string\n\
01625 \n\
01626 Return a string which is the concatenation of the strings in the\n\
01627 sequence.  The separator between elements is S.");
01628 
01629 static PyObject *
01630 string_join(PyStringObject *self, PyObject *orig)
01631 {
01632         char *sep = PyString_AS_STRING(self);
01633         const Py_ssize_t seplen = PyString_GET_SIZE(self);
01634         PyObject *res = NULL;
01635         char *p;
01636         Py_ssize_t seqlen = 0;
01637         size_t sz = 0;
01638         Py_ssize_t i;
01639         PyObject *seq, *item;
01640 
01641         seq = PySequence_Fast(orig, "");
01642         if (seq == NULL) {
01643                 return NULL;
01644         }
01645 
01646         seqlen = PySequence_Size(seq);
01647         if (seqlen == 0) {
01648                 Py_DECREF(seq);
01649                 return PyString_FromString("");
01650         }
01651         if (seqlen == 1) {
01652                 item = PySequence_Fast_GET_ITEM(seq, 0);
01653                 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
01654                         Py_INCREF(item);
01655                         Py_DECREF(seq);
01656                         return item;
01657                 }
01658         }
01659 
01660         /* There are at least two things to join, or else we have a subclass
01661          * of the builtin types in the sequence.  
01662          * Do a pre-pass to figure out the total amount of space we'll
01663          * need (sz), see whether any argument is absurd, and defer to
01664          * the Unicode join if appropriate.
01665          */
01666         for (i = 0; i < seqlen; i++) {
01667                 const size_t old_sz = sz;
01668                 item = PySequence_Fast_GET_ITEM(seq, i);
01669                 if (!PyString_Check(item)){
01670 #ifdef Py_USING_UNICODE
01671                         if (PyUnicode_Check(item)) {
01672                                 /* Defer to Unicode join.
01673                                  * CAUTION:  There's no gurantee that the
01674                                  * original sequence can be iterated over
01675                                  * again, so we must pass seq here.
01676                                  */
01677                                 PyObject *result;
01678                                 result = PyUnicode_Join((PyObject *)self, seq);
01679                                 Py_DECREF(seq);
01680                                 return result;
01681                         }
01682 #endif
01683                         PyErr_Format(PyExc_TypeError,
01684                                      "sequence item %i: expected string,"
01685                                      " %.80s found",
01686                                      /*XXX*/(int)i, item->ob_type->tp_name);
01687                         Py_DECREF(seq);
01688                         return NULL;
01689                 }
01690                 sz += PyString_GET_SIZE(item);
01691                 if (i != 0)
01692                         sz += seplen;
01693                 if (sz < old_sz || sz > INT_MAX) {
01694                         PyErr_SetString(PyExc_OverflowError,
01695                                 "join() is too long for a Python string");
01696                         Py_DECREF(seq);
01697                         return NULL;
01698                 }
01699         }
01700 
01701         /* Allocate result space. */
01702         res = PyString_FromStringAndSize((char*)NULL, sz);
01703         if (res == NULL) {
01704                 Py_DECREF(seq);
01705                 return NULL;
01706         }
01707 
01708         /* Catenate everything. */
01709         p = PyString_AS_STRING(res);
01710         for (i = 0; i < seqlen; ++i) {
01711                 size_t n;
01712                 item = PySequence_Fast_GET_ITEM(seq, i);
01713                 n = PyString_GET_SIZE(item);
01714                 memcpy(p, PyString_AS_STRING(item), n);
01715                 p += n;
01716                 if (i < seqlen - 1) {
01717                         memcpy(p, sep, seplen);
01718                         p += seplen;
01719                 }
01720         }
01721 
01722         Py_DECREF(seq);
01723         return res;
01724 }
01725 
01726 PyObject *
01727 _PyString_Join(PyObject *sep, PyObject *x)
01728 {
01729         assert(sep != NULL && PyString_Check(sep));
01730         assert(x != NULL);
01731         return string_join((PyStringObject *)sep, x);
01732 }
01733 
01734 static void
01735 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
01736 {
01737         if (*end > len)
01738                 *end = len;
01739         else if (*end < 0)
01740                 *end += len;
01741         if (*end < 0)
01742                 *end = 0;
01743         if (*start < 0)
01744                 *start += len;
01745         if (*start < 0)
01746                 *start = 0;
01747 }
01748 
01749 static Py_ssize_t
01750 string_find_internal(PyStringObject *self, PyObject *args, int dir)
01751 {
01752         const char *s = PyString_AS_STRING(self), *sub;
01753         Py_ssize_t len = PyString_GET_SIZE(self);
01754         Py_ssize_t n, i = 0, last = INT_MAX;
01755         PyObject *subobj;
01756 
01757         /* XXX ssize_t i */
01758         if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex",
01759                 &subobj, _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
01760                 return -2;
01761         if (PyString_Check(subobj)) {
01762                 sub = PyString_AS_STRING(subobj);
01763                 n = PyString_GET_SIZE(subobj);
01764         }
01765 #ifdef Py_USING_UNICODE
01766         else if (PyUnicode_Check(subobj))
01767                 return PyUnicode_Find((PyObject *)self, subobj, i, last, dir);
01768 #endif
01769         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
01770                 return -2;
01771 
01772         string_adjust_indices(&i, &last, len);
01773 
01774         if (dir > 0) {
01775                 if (n == 0 && i <= last)
01776                         return (long)i;
01777                 last -= n;
01778                 for (; i <= last; ++i)
01779                         if (s[i] == sub[0] && memcmp(&s[i], sub, n) == 0)
01780                                 return (long)i;
01781         }
01782         else {
01783                 Py_ssize_t j;
01784 
01785                 if (n == 0 && i <= last)
01786                         return last;
01787                 for (j = last-n; j >= i; --j)
01788                         if (s[j] == sub[0] && memcmp(&s[j], sub, n) == 0)
01789                                 return j;
01790         }
01791 
01792         return -1;
01793 }
01794 
01795 
01796 PyDoc_STRVAR(find__doc__,
01797 "S.find(sub [,start [,end]]) -> int\n\
01798 \n\
01799 Return the lowest index in S where substring sub is found,\n\
01800 such that sub is contained within s[start,end].  Optional\n\
01801 arguments start and end are interpreted as in slice notation.\n\
01802 \n\
01803 Return -1 on failure.");
01804 
01805 static PyObject *
01806 string_find(PyStringObject *self, PyObject *args)
01807 {
01808         Py_ssize_t result = string_find_internal(self, args, +1);
01809         if (result == -2)
01810                 return NULL;
01811         return PyInt_FromSsize_t(result);
01812 }
01813 
01814 
01815 PyDoc_STRVAR(index__doc__,
01816 "S.index(sub [,start [,end]]) -> int\n\
01817 \n\
01818 Like S.find() but raise ValueError when the substring is not found.");
01819 
01820 static PyObject *
01821 string_index(PyStringObject *self, PyObject *args)
01822 {
01823         Py_ssize_t result = string_find_internal(self, args, +1);
01824         if (result == -2)
01825                 return NULL;
01826         if (result == -1) {
01827                 PyErr_SetString(PyExc_ValueError,
01828                                 "substring not found");
01829                 return NULL;
01830         }
01831         return PyInt_FromSsize_t(result);
01832 }
01833 
01834 
01835 PyDoc_STRVAR(rfind__doc__,
01836 "S.rfind(sub [,start [,end]]) -> int\n\
01837 \n\
01838 Return the highest index in S where substring sub is found,\n\
01839 such that sub is contained within s[start,end].  Optional\n\
01840 arguments start and end are interpreted as in slice notation.\n\
01841 \n\
01842 Return -1 on failure.");
01843 
01844 static PyObject *
01845 string_rfind(PyStringObject *self, PyObject *args)
01846 {
01847         Py_ssize_t result = string_find_internal(self, args, -1);
01848         if (result == -2)
01849                 return NULL;
01850         return PyInt_FromSsize_t(result);
01851 }
01852 
01853 
01854 PyDoc_STRVAR(rindex__doc__,
01855 "S.rindex(sub [,start [,end]]) -> int\n\
01856 \n\
01857 Like S.rfind() but raise ValueError when the substring is not found.");
01858 
01859 static PyObject *
01860 string_rindex(PyStringObject *self, PyObject *args)
01861 {
01862         Py_ssize_t result = string_find_internal(self, args, -1);
01863         if (result == -2)
01864                 return NULL;
01865         if (result == -1) {
01866                 PyErr_SetString(PyExc_ValueError,
01867                                 "substring not found");
01868                 return NULL;
01869         }
01870         return PyInt_FromSsize_t(result);
01871 }
01872 
01873 
01874 static PyObject *
01875 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
01876 {
01877         char *s = PyString_AS_STRING(self);
01878         Py_ssize_t len = PyString_GET_SIZE(self);
01879         char *sep = PyString_AS_STRING(sepobj);
01880         Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
01881         Py_ssize_t i, j;
01882 
01883         i = 0;
01884         if (striptype != RIGHTSTRIP) {
01885                 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
01886                         i++;
01887                 }
01888         }
01889 
01890         j = len;
01891         if (striptype != LEFTSTRIP) {
01892                 do {
01893                         j--;
01894                 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
01895                 j++;
01896         }
01897 
01898         if (i == 0 && j == len && PyString_CheckExact(self)) {
01899                 Py_INCREF(self);
01900                 return (PyObject*)self;
01901         }
01902         else
01903                 return PyString_FromStringAndSize(s+i, j-i);
01904 }
01905 
01906 
01907 static PyObject *
01908 do_strip(PyStringObject *self, int striptype)
01909 {
01910         char *s = PyString_AS_STRING(self);
01911         Py_ssize_t len = PyString_GET_SIZE(self), i, j;
01912 
01913         i = 0;
01914         if (striptype != RIGHTSTRIP) {
01915                 while (i < len && isspace(Py_CHARMASK(s[i]))) {
01916                         i++;
01917                 }
01918         }
01919 
01920         j = len;
01921         if (striptype != LEFTSTRIP) {
01922                 do {
01923                         j--;
01924                 } while (j >= i && isspace(Py_CHARMASK(s[j])));
01925                 j++;
01926         }
01927 
01928         if (i == 0 && j == len && PyString_CheckExact(self)) {
01929                 Py_INCREF(self);
01930                 return (PyObject*)self;
01931         }
01932         else
01933                 return PyString_FromStringAndSize(s+i, j-i);
01934 }
01935 
01936 
01937 static PyObject *
01938 do_argstrip(PyStringObject *self, int striptype, PyObject *args)
01939 {
01940         PyObject *sep = NULL;
01941 
01942         if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
01943                 return NULL;
01944 
01945         if (sep != NULL && sep != Py_None) {
01946                 if (PyString_Check(sep))
01947                         return do_xstrip(self, striptype, sep);
01948 #ifdef Py_USING_UNICODE
01949                 else if (PyUnicode_Check(sep)) {
01950                         PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
01951                         PyObject *res;
01952                         if (uniself==NULL)
01953                                 return NULL;
01954                         res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
01955                                 striptype, sep);
01956                         Py_DECREF(uniself);
01957                         return res;
01958                 }
01959 #endif
01960                 else {
01961                         PyErr_Format(PyExc_TypeError,
01962 #ifdef Py_USING_UNICODE
01963                                      "%s arg must be None, str or unicode",
01964 #else
01965                                      "%s arg must be None or str",
01966 #endif
01967                                      STRIPNAME(striptype));
01968                         return NULL;
01969                 }
01970                 return do_xstrip(self, striptype, sep);
01971         }
01972 
01973         return do_strip(self, striptype);
01974 }
01975 
01976 
01977 PyDoc_STRVAR(strip__doc__,
01978 "S.strip([chars]) -> string or unicode\n\
01979 \n\
01980 Return a copy of the string S with leading and trailing\n\
01981 whitespace removed.\n\
01982 If chars is given and not None, remove characters in chars instead.\n\
01983 If chars is unicode, S will be converted to unicode before stripping");
01984 
01985 static PyObject *
01986 string_strip(PyStringObject *self, PyObject *args)
01987 {
01988         if (PyTuple_GET_SIZE(args) == 0)
01989                 return do_strip(self, BOTHSTRIP); /* Common case */
01990         else
01991                 return do_argstrip(self, BOTHSTRIP, args);
01992 }
01993 
01994 
01995 PyDoc_STRVAR(lstrip__doc__,
01996 "S.lstrip([chars]) -> string or unicode\n\
01997 \n\
01998 Return a copy of the string S with leading whitespace removed.\n\
01999 If chars is given and not None, remove characters in chars instead.\n\
02000 If chars is unicode, S will be converted to unicode before stripping");
02001 
02002 static PyObject *
02003 string_lstrip(PyStringObject *self, PyObject *args)
02004 {
02005         if (PyTuple_GET_SIZE(args) == 0)
02006                 return do_strip(self, LEFTSTRIP); /* Common case */
02007         else
02008                 return do_argstrip(self, LEFTSTRIP, args);
02009 }
02010 
02011 
02012 PyDoc_STRVAR(rstrip__doc__,
02013 "S.rstrip([chars]) -> string or unicode\n\
02014 \n\
02015 Return a copy of the string S with trailing whitespace removed.\n\
02016 If chars is given and not None, remove characters in chars instead.\n\
02017 If chars is unicode, S will be converted to unicode before stripping");
02018 
02019 static PyObject *
02020 string_rstrip(PyStringObject *self, PyObject *args)
02021 {
02022         if (PyTuple_GET_SIZE(args) == 0)
02023                 return do_strip(self, RIGHTSTRIP); /* Common case */
02024         else
02025                 return do_argstrip(self, RIGHTSTRIP, args);
02026 }
02027 
02028 
02029 PyDoc_STRVAR(lower__doc__,
02030 "S.lower() -> string\n\
02031 \n\
02032 Return a copy of the string S converted to lowercase.");
02033 
02034 static PyObject *
02035 string_lower(PyStringObject *self)
02036 {
02037         char *s = PyString_AS_STRING(self), *s_new;
02038         Py_ssize_t i, n = PyString_GET_SIZE(self);
02039         PyObject *new;
02040 
02041         new = PyString_FromStringAndSize(NULL, n);
02042         if (new == NULL)
02043                 return NULL;
02044         s_new = PyString_AsString(new);
02045         for (i = 0; i < n; i++) {
02046                 int c = Py_CHARMASK(*s++);
02047                 if (isupper(c)) {
02048                         *s_new = tolower(c);
02049                 } else
02050                         *s_new = c;
02051                 s_new++;
02052         }
02053         return new;
02054 }
02055 
02056 
02057 PyDoc_STRVAR(upper__doc__,
02058 "S.upper() -> string\n\
02059 \n\
02060 Return a copy of the string S converted to uppercase.");
02061 
02062 static PyObject *
02063 string_upper(PyStringObject *self)
02064 {
02065         char *s = PyString_AS_STRING(self), *s_new;
02066         Py_ssize_t i, n = PyString_GET_SIZE(self);
02067         PyObject *new;
02068 
02069         new = PyString_FromStringAndSize(NULL, n);
02070         if (new == NULL)
02071                 return NULL;
02072         s_new = PyString_AsString(new);
02073         for (i = 0; i < n; i++) {
02074                 int c = Py_CHARMASK(*s++);
02075                 if (islower(c)) {
02076                         *s_new = toupper(c);
02077                 } else
02078                         *s_new = c;
02079                 s_new++;
02080         }
02081         return new;
02082 }
02083 
02084 
02085 PyDoc_STRVAR(title__doc__,
02086 "S.title() -> string\n\
02087 \n\
02088 Return a titlecased version of S, i.e. words start with uppercase\n\
02089 characters, all remaining cased characters have lowercase.");
02090 
02091 static PyObject*
02092 string_title(PyStringObject *self)
02093 {
02094         char *s = PyString_AS_STRING(self), *s_new;
02095         Py_ssize_t i, n = PyString_GET_SIZE(self);
02096         int previous_is_cased = 0;
02097         PyObject *new;
02098 
02099         new = PyString_FromStringAndSize(NULL, n);
02100         if (new == NULL)
02101                 return NULL;
02102         s_new = PyString_AsString(new);
02103         for (i = 0; i < n; i++) {
02104                 int c = Py_CHARMASK(*s++);
02105                 if (islower(c)) {
02106                         if (!previous_is_cased)
02107                             c = toupper(c);
02108                         previous_is_cased = 1;
02109                 } else if (isupper(c)) {
02110                         if (previous_is_cased)
02111                             c = tolower(c);
02112                         previous_is_cased = 1;
02113                 } else
02114                         previous_is_cased = 0;
02115                 *s_new++ = c;
02116         }
02117         return new;
02118 }
02119 
02120 PyDoc_STRVAR(capitalize__doc__,
02121 "S.capitalize() -> string\n\
02122 \n\
02123 Return a copy of the string S with only its first character\n\
02124 capitalized.");
02125 
02126 static PyObject *
02127 string_capitalize(PyStringObject *self)
02128 {
02129         char *s = PyString_AS_STRING(self), *s_new;
02130         Py_ssize_t i, n = PyString_GET_SIZE(self);
02131         PyObject *new;
02132 
02133         new = PyString_FromStringAndSize(NULL, n);
02134         if (new == NULL)
02135                 return NULL;
02136         s_new = PyString_AsString(new);
02137         if (0 < n) {
02138                 int c = Py_CHARMASK(*s++);
02139                 if (islower(c))
02140                         *s_new = toupper(c);
02141                 else
02142                         *s_new = c;
02143                 s_new++;
02144         }
02145         for (i = 1; i < n; i++) {
02146                 int c = Py_CHARMASK(*s++);
02147                 if (isupper(c))
02148                         *s_new = tolower(c);
02149                 else
02150                         *s_new = c;
02151                 s_new++;
02152         }
02153         return new;
02154 }
02155 
02156 
02157 PyDoc_STRVAR(count__doc__,
02158 "S.count(sub[, start[, end]]) -> int\n\
02159 \n\
02160 Return the number of occurrences of substring sub in string\n\
02161 S[start:end].  Optional arguments start and end are\n\
02162 interpreted as in slice notation.");
02163 
02164 static PyObject *
02165 string_count(PyStringObject *self, PyObject *args)
02166 {
02167         const char *s = PyString_AS_STRING(self), *sub, *t;
02168         Py_ssize_t len = PyString_GET_SIZE(self), n;
02169         Py_ssize_t i = 0, last = INT_MAX;
02170         Py_ssize_t m, r;
02171         PyObject *subobj;
02172 
02173         if (!PyArg_ParseTuple(args, "O|O&O&:count", &subobj,
02174                 _PyEval_SliceIndex, &i, _PyEval_SliceIndex, &last))
02175                 return NULL;
02176 
02177         if (PyString_Check(subobj)) {
02178                 sub = PyString_AS_STRING(subobj);
02179                 n = PyString_GET_SIZE(subobj);
02180         }
02181 #ifdef Py_USING_UNICODE
02182         else if (PyUnicode_Check(subobj)) {
02183                 Py_ssize_t count;
02184                 count = PyUnicode_Count((PyObject *)self, subobj, i, last);
02185                 if (count == -1)
02186                         return NULL;
02187                 else
02188                         return PyInt_FromLong((long) count);
02189         }
02190 #endif
02191         else if (PyObject_AsCharBuffer(subobj, &sub, &n))
02192                 return NULL;
02193 
02194         string_adjust_indices(&i, &last, len);
02195 
02196         m = last + 1 - n;
02197         if (n == 0)
02198                 return PyInt_FromSsize_t(m-i);
02199 
02200         r = 0;
02201         while (i < m) {
02202                 if (!memcmp(s+i, sub, n)) {
02203                         r++;
02204                         i += n;
02205                 } else {
02206                         i++;
02207                 }
02208                 if (i >= m)
02209                         break;
02210                 t = memchr(s+i, sub[0], m-i);
02211                 if (t == NULL)
02212                         break;
02213                 i = t - s;
02214         }
02215         return PyInt_FromSsize_t(r);
02216 }
02217 
02218 PyDoc_STRVAR(swapcase__doc__,
02219 "S.swapcase() -> string\n\
02220 \n\
02221 Return a copy of the string S with uppercase characters\n\
02222 converted to lowercase and vice versa.");
02223 
02224 static PyObject *
02225 string_swapcase(PyStringObject *self)
02226 {
02227         char *s = PyString_AS_STRING(self), *s_new;
02228         Py_ssize_t i, n = PyString_GET_SIZE(self);
02229         PyObject *new;
02230 
02231         new = PyString_FromStringAndSize(NULL, n);
02232         if (new == NULL)
02233                 return NULL;
02234         s_new = PyString_AsString(new);
02235         for (i = 0; i < n; i++) {
02236                 int c = Py_CHARMASK(*s++);
02237                 if (islower(c)) {
02238                         *s_new = toupper(c);
02239                 }
02240                 else if (isupper(c)) {
02241                         *s_new = tolower(c);
02242                 }
02243                 else
02244                         *s_new = c;
02245                 s_new++;
02246         }
02247         return new;
02248 }
02249 
02250 
02251 PyDoc_STRVAR(translate__doc__,
02252 "S.translate(table [,deletechars]) -> string\n\
02253 \n\
02254 Return a copy of the string S, where all characters occurring\n\
02255 in the optional argument deletechars are removed, and the\n\
02256 remaining characters have been mapped through the given\n\
02257 translation table, which must be a string of length 256.");
02258 
02259 static PyObject *
02260 string_translate(PyStringObject *self, PyObject *args)
02261 {
02262         register char *input, *output;
02263         register const char *table;
02264         register Py_ssize_t i, c, changed = 0;
02265         PyObject *input_obj = (PyObject*)self;
02266         const char *table1, *output_start, *del_table=NULL;
02267         Py_ssize_t inlen, tablen, dellen = 0;
02268         PyObject *result;
02269         int trans_table[256];
02270         PyObject *tableobj, *delobj = NULL;
02271 
02272         if (!PyArg_UnpackTuple(args, "translate", 1, 2,
02273                               &tableobj, &delobj))
02274                 return NULL;
02275 
02276         if (PyString_Check(tableobj)) {
02277                 table1 = PyString_AS_STRING(tableobj);
02278                 tablen = PyString_GET_SIZE(tableobj);
02279         }
02280 #ifdef Py_USING_UNICODE
02281         else if (PyUnicode_Check(tableobj)) {
02282                 /* Unicode .translate() does not support the deletechars
02283                    parameter; instead a mapping to None will cause characters
02284                    to be deleted. */
02285                 if (delobj != NULL) {
02286                         PyErr_SetString(PyExc_TypeError,
02287                         "deletions are implemented differently for unicode");
02288                         return NULL;
02289                 }
02290                 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
02291         }
02292 #endif
02293         else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
02294                 return NULL;
02295 
02296         if (tablen != 256) {
02297                 PyErr_SetString(PyExc_ValueError,
02298                   "translation table must be 256 characters long");
02299                 return NULL;
02300         }
02301 
02302         if (delobj != NULL) {
02303                 if (PyString_Check(delobj)) {
02304                         del_table = PyString_AS_STRING(delobj);
02305                         dellen = PyString_GET_SIZE(delobj);
02306                 }
02307 #ifdef Py_USING_UNICODE
02308                 else if (PyUnicode_Check(delobj)) {
02309                         PyErr_SetString(PyExc_TypeError,
02310                         "deletions are implemented differently for unicode");
02311                         return NULL;
02312                 }
02313 #endif
02314                 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
02315                         return NULL;
02316         }
02317         else {
02318                 del_table = NULL;
02319                 dellen = 0;
02320         }
02321 
02322         table = table1;
02323         inlen = PyString_Size(input_obj);
02324         result = PyString_FromStringAndSize((char *)NULL, inlen);
02325         if (result == NULL)
02326                 return NULL;
02327         output_start = output = PyString_AsString(result);
02328         input = PyString_AsString(input_obj);
02329 
02330         if (dellen == 0) {
02331                 /* If no deletions are required, use faster code */
02332                 for (i = inlen; --i >= 0; ) {
02333                         c = Py_CHARMASK(*input++);
02334                         if (Py_CHARMASK((*output++ = table[c])) != c)
02335                                 changed = 1;
02336                 }
02337                 if (changed || !PyString_CheckExact(input_obj))
02338                         return result;
02339                 Py_DECREF(result);
02340                 Py_INCREF(input_obj);
02341                 return input_obj;
02342         }
02343 
02344         for (i = 0; i < 256; i++)
02345                 trans_table[i] = Py_CHARMASK(table[i]);
02346 
02347         for (i = 0; i < dellen; i++)
02348                 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
02349 
02350         for (i = inlen; --i >= 0; ) {
02351                 c = Py_CHARMASK(*input++);
02352                 if (trans_table[c] != -1)
02353                         if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
02354                                 continue;
02355                 changed = 1;
02356         }
02357         if (!changed && PyString_CheckExact(input_obj)) {
02358                 Py_DECREF(result);
02359                 Py_INCREF(input_obj);
02360                 return input_obj;
02361         }
02362         /* Fix the size of the resulting string */
02363         if (inlen > 0)
02364                 _PyString_Resize(&result, output - output_start);
02365         return result;
02366 }
02367 
02368 
02369 /* What follows is used for implementing replace().  Perry Stoll. */
02370 
02371 /*
02372   mymemfind
02373 
02374   strstr replacement for arbitrary blocks of memory.
02375 
02376   Locates the first occurrence in the memory pointed to by MEM of the
02377   contents of memory pointed to by PAT.  Returns the index into MEM if
02378   found, or -1 if not found.  If len of PAT is greater than length of
02379   MEM, the function returns -1.
02380 */
02381 static Py_ssize_t
02382 mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
02383 {
02384         register Py_ssize_t ii;
02385 
02386         /* pattern can not occur in the last pat_len-1 chars */
02387         len -= pat_len;
02388 
02389         for (ii = 0; ii <= len; ii++) {
02390                 if (mem[ii] == pat[0] && memcmp(&mem[ii], pat, pat_len) == 0) {
02391                         return ii;
02392                 }
02393         }
02394         return -1;
02395 }
02396 
02397 /*
02398   mymemcnt
02399 
02400    Return the number of distinct times PAT is found in MEM.
02401    meaning mem=1111 and pat==11 returns 2.
02402            mem=11111 and pat==11 also return 2.
02403  */
02404 static Py_ssize_t
02405 mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
02406 {
02407         register Py_ssize_t offset = 0;
02408         Py_ssize_t nfound = 0;
02409 
02410         while (len >= 0) {
02411                 offset = mymemfind(mem, len, pat, pat_len);
02412                 if (offset == -1)
02413                         break;
02414                 mem += offset + pat_len;
02415                 len -= offset + pat_len;
02416                 nfound++;
02417         }
02418         return nfound;
02419 }
02420 
02421 /*
02422    mymemreplace
02423 
02424    Return a string in which all occurrences of PAT in memory STR are
02425    replaced with SUB.
02426 
02427    If length of PAT is less than length of STR or there are no occurrences
02428    of PAT in STR, then the original string is returned. Otherwise, a new
02429    string is allocated here and returned.
02430 
02431    on return, out_len is:
02432        the length of output string, or
02433        -1 if the input string is returned, or
02434        unchanged if an error occurs (no memory).
02435 
02436    return value is:
02437        the new string allocated locally, or
02438        NULL if an error occurred.
02439 */
02440 static char *
02441 mymemreplace(const char *str, Py_ssize_t len,           /* input string */
02442              const char *pat, Py_ssize_t pat_len,       /* pattern string to find */
02443              const char *sub, Py_ssize_t sub_len,       /* substitution string */
02444              Py_ssize_t count,                          /* number of replacements */
02445              Py_ssize_t *out_len)
02446 {
02447         char *out_s;
02448         char *new_s;
02449         Py_ssize_t nfound, offset, new_len;
02450 
02451         if (len == 0 || (pat_len == 0 && sub_len == 0) || pat_len > len)
02452                 goto return_same;
02453 
02454         /* find length of output string */
02455         nfound = (pat_len > 0) ? mymemcnt(str, len, pat, pat_len) : len + 1;
02456         if (count < 0)
02457                 count = INT_MAX;
02458         else if (nfound > count)
02459                 nfound = count;
02460         if (nfound == 0)
02461                 goto return_same;
02462 
02463         new_len = len + nfound*(sub_len - pat_len);
02464         if (new_len == 0) {
02465                 /* Have to allocate something for the caller to free(). */
02466                 out_s = (char *)PyMem_MALLOC(1);
02467                 if (out_s == NULL)
02468                         return NULL;
02469                 out_s[0] = '\0';
02470         }
02471         else {
02472                 assert(new_len > 0);
02473                 new_s = (char *)PyMem_MALLOC(new_len);
02474                 if (new_s == NULL)
02475                         return NULL;
02476                 out_s = new_s;
02477 
02478                 if (pat_len > 0) {
02479                         for (; nfound > 0; --nfound) {
02480                                 /* find index of next instance of pattern */
02481                                 offset = mymemfind(str, len, pat, pat_len);
02482                                 if (offset == -1)
02483                                         break;
02484 
02485                                 /* copy non matching part of input string */
02486                                 memcpy(new_s, str, offset);
02487                                 str += offset + pat_len;
02488                                 len -= offset + pat_len;
02489 
02490                                 /* copy substitute into the output string */
02491                                 new_s += offset;
02492                                 memcpy(new_s, sub, sub_len);
02493                                 new_s += sub_len;
02494                         }
02495                         /* copy any remaining values into output string */
02496                         if (len > 0)
02497                                 memcpy(new_s, str, len);
02498                 }
02499                 else {
02500                         for (;;++str, --len) {
02501                                 memcpy(new_s, sub, sub_len);
02502                                 new_s += sub_len;
02503                                 if (--nfound <= 0) {
02504                                         memcpy(new_s, str, len);
02505                                         break;
02506                                 }
02507                                 *new_s++ = *str;
02508                         }
02509                 }
02510         }
02511         *out_len = new_len;
02512         return out_s;
02513 
02514   return_same:
02515         *out_len = -1;
02516         return (char *)str; /* cast away const */
02517 }
02518 
02519 
02520 PyDoc_STRVAR(replace__doc__,
02521 "S.replace (old, new[, count]) -> string\n\
02522 \n\
02523 Return a copy of string S with all occurrences of substring\n\
02524 old replaced by new.  If the optional argument count is\n\
02525 given, only the first count occurrences are replaced.");
02526 
02527 static PyObject *
02528 string_replace(PyStringObject *self, PyObject *args)
02529 {
02530         const char *str = PyString_AS_STRING(self), *sub, *repl;
02531         char *new_s;
02532         const Py_ssize_t len = PyString_GET_SIZE(self);
02533         Py_ssize_t sub_len, repl_len, out_len;
02534         int count = -1;
02535         PyObject *new;
02536         PyObject *subobj, *replobj;
02537 
02538         if (!PyArg_ParseTuple(args, "OO|i:replace",
02539                               &subobj, &replobj, &count))
02540                 return NULL;
02541 
02542         if (PyString_Check(subobj)) {
02543                 sub = PyString_AS_STRING(subobj);
02544                 sub_len = PyString_GET_SIZE(subobj);
02545         }
02546 #ifdef Py_USING_UNICODE
02547         else if (PyUnicode_Check(subobj))
02548                 return PyUnicode_Replace((PyObject *)self,
02549                                          subobj, replobj, count);
02550 #endif
02551         else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
02552                 return NULL;
02553 
02554         if (PyString_Check(replobj)) {
02555                 repl = PyString_AS_STRING(replobj);
02556                 repl_len = PyString_GET_SIZE(replobj);
02557         }
02558 #ifdef Py_USING_UNICODE
02559         else if (PyUnicode_Check(replobj))
02560                 return PyUnicode_Replace((PyObject *)self,
02561                                          subobj, replobj, count);
02562 #endif
02563         else if (PyObject_AsCharBuffer(replobj, &repl, &repl_len))
02564                 return NULL;
02565 
02566         new_s = mymemreplace(str,len,sub,sub_len,repl,repl_len,count,&out_len);
02567         if (new_s == NULL) {
02568                 PyErr_NoMemory();
02569                 return NULL;
02570         }
02571         if (out_len == -1) {
02572                 if (PyString_CheckExact(self)) {
02573                         /* we're returning another reference to self */
02574                         new = (PyObject*)self;
02575                         Py_INCREF(new);
02576                 }
02577                 else {
02578                         new = PyString_FromStringAndSize(str, len);
02579                         if (new == NULL)
02580                                 return NULL;
02581                 }
02582         }
02583         else {
02584                 new = PyString_FromStringAndSize(new_s, out_len);
02585                 PyMem_FREE(new_s);
02586         }
02587         return new;
02588 }
02589 
02590 
02591 PyDoc_STRVAR(startswith__doc__,
02592 "S.startswith(prefix[, start[, end]]) -> bool\n\
02593 \n\
02594 Return True if S starts with the specified prefix, False otherwise.\n\
02595 With optional start, test S beginning at that position.\n\
02596 With optional end, stop comparing S at that position.");
02597 
02598 static PyObject *
02599 string_startswith(PyStringObject *self, PyObject *args)
02600 {
02601         const char* str = PyString_AS_STRING(self);
02602         Py_ssize_t len = PyString_GET_SIZE(self);
02603         const char* prefix;
02604         Py_ssize_t plen;
02605         Py_ssize_t start = 0;
02606         Py_ssize_t end = INT_MAX;
02607         PyObject *subobj;
02608 
02609         if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
02610                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
02611                 return NULL;
02612         if (PyString_Check(subobj)) {
02613                 prefix = PyString_AS_STRING(subobj);
02614                 plen = PyString_GET_SIZE(subobj);
02615         }
02616 #ifdef Py_USING_UNICODE
02617         else if (PyUnicode_Check(subobj)) {
02618                 Py_ssize_t rc;
02619                 rc = PyUnicode_Tailmatch((PyObject *)self,
02620                                           subobj, start, end, -1);
02621                 if (rc == -1)
02622                         return NULL;
02623                 else
02624                         return PyBool_FromLong((long) rc);
02625         }
02626 #endif
02627         else if (PyObject_AsCharBuffer(subobj, &prefix, &plen))
02628                 return NULL;
02629 
02630         string_adjust_indices(&start, &end, len);
02631 
02632         if (start+plen > len)
02633                 return PyBool_FromLong(0);
02634 
02635         if (end-start >= plen)
02636                 return PyBool_FromLong(!memcmp(str+start, prefix, plen));
02637         else
02638                 return PyBool_FromLong(0);
02639 }
02640 
02641 
02642 PyDoc_STRVAR(endswith__doc__,
02643 "S.endswith(suffix[, start[, end]]) -> bool\n\
02644 \n\
02645 Return True if S ends with the specified suffix, False otherwise.\n\
02646 With optional start, test S beginning at that position.\n\
02647 With optional end, stop comparing S at that position.");
02648 
02649 static PyObject *
02650 string_endswith(PyStringObject *self, PyObject *args)
02651 {
02652         const char* str = PyString_AS_STRING(self);
02653         Py_ssize_t len = PyString_GET_SIZE(self);
02654         const char* suffix;
02655         Py_ssize_t slen;
02656         Py_ssize_t start = 0;
02657         Py_ssize_t end = INT_MAX;
02658         PyObject *subobj;
02659 
02660         if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
02661                 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
02662                 return NULL;
02663         if (PyString_Check(subobj)) {
02664                 suffix = PyString_AS_STRING(subobj);
02665                 slen = PyString_GET_SIZE(subobj);
02666         }
02667 #ifdef Py_USING_UNICODE
02668         else if (PyUnicode_Check(subobj)) {
02669                 Py_ssize_t rc;
02670                 rc = PyUnicode_Tailmatch((PyObject *)self,
02671                                           subobj, start, end, +1);
02672                 if (rc == -1)
02673                         return NULL;
02674                 else
02675                         return PyBool_FromLong((long) rc);
02676         }
02677 #endif
02678         else if (PyObject_AsCharBuffer(subobj, &suffix, &slen))
02679                 return NULL;
02680 
02681         string_adjust_indices(&start, &end, len);
02682 
02683         if (end-start < slen || start > len)
02684                 return PyBool_FromLong(0);
02685 
02686         if (end-slen > start)
02687                 start = end - slen;
02688         if (end-start >= slen)
02689                 return PyBool_FromLong(!memcmp(str+start, suffix, slen));
02690         else
02691                 return PyBool_FromLong(0);
02692 }
02693 
02694 
02695 PyDoc_STRVAR(encode__doc__,
02696 "S.encode([encoding[,errors]]) -> object\n\
02697 \n\
02698 Encodes S using the codec registered for encoding. encoding defaults\n\
02699 to the default encoding. errors may be given to set a different error\n\
02700 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
02701 a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
02702 'xmlcharrefreplace' as well as any other name registered with\n\
02703 codecs.register_error that is able to handle UnicodeEncodeErrors.");
02704 
02705 static PyObject *
02706 string_encode(PyStringObject *self, PyObject *args)
02707 {
02708     char *encoding = NULL;
02709     char *errors = NULL;
02710     PyObject *v;
02711     
02712     if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
02713         return NULL;
02714     v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
02715     if (v == NULL)
02716         goto onError;
02717     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
02718         PyErr_Format(PyExc_TypeError,
02719                      "encoder did not return a string/unicode object "
02720                      "(type=%.400s)",
02721                      v->ob_type->tp_name);
02722         Py_DECREF(v);
02723         return NULL;
02724     }
02725     return v;
02726 
02727  onError:
02728     return NULL;
02729 }
02730 
02731 
02732 PyDoc_STRVAR(decode__doc__,
02733 "S.decode([encoding[,errors]]) -> object\n\
02734 \n\
02735 Decodes S using the codec registered for encoding. encoding defaults\n\
02736 to the default encoding. errors may be given to set a different error\n\
02737 handling scheme. Default is 'strict' meaning that encoding errors raise\n\
02738 a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
02739 as well as any other name registerd with codecs.register_error that is\n\
02740 able to handle UnicodeDecodeErrors.");
02741 
02742 static PyObject *
02743 string_decode(PyStringObject *self, PyObject *args)
02744 {
02745     char *encoding = NULL;
02746     char *errors = NULL;
02747     PyObject *v;
02748     
02749     if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
02750         return NULL;
02751     v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
02752     if (v == NULL)
02753         goto onError;
02754     if (!PyString_Check(v) && !PyUnicode_Check(v)) {
02755         PyErr_Format(PyExc_TypeError,
02756                      "decoder did not return a string/unicode object "
02757                      "(type=%.400s)",
02758                      v->ob_type->tp_name);
02759         Py_DECREF(v);
02760         return NULL;
02761     }
02762     return v;
02763 
02764  onError:
02765     return NULL;
02766 }
02767 
02768 
02769 PyDoc_STRVAR(expandtabs__doc__,
02770 "S.expandtabs([tabsize]) -> string\n\
02771 \n\
02772 Return a copy of S where all tab characters are expanded using spaces.\n\
02773 If tabsize is not given, a tab size of 8 characters is assumed.");
02774 
02775 static PyObject*
02776 string_expandtabs(PyStringObject *self, PyObject *args)
02777 {
02778     const char *e, *p;
02779     char *q;
02780     Py_ssize_t i, j;
02781     PyObject *u;
02782     int tabsize = 8;
02783 
02784     if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
02785         return NULL;
02786 
02787     /* First pass: determine size of output string */
02788     i = j = 0;
02789     e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
02790     for (p = PyString_AS_STRING(self); p < e; p++)
02791         if (*p == '\t') {
02792             if (tabsize > 0)
02793                 j += tabsize - (j % tabsize);
02794         }
02795         else {
02796             j++;
02797             if (*p == '\n' || *p == '\r') {
02798                 i += j;
02799                 j = 0;
02800             }
02801         }
02802 
02803     /* Second pass: create output string and fill it */
02804     u = PyString_FromStringAndSize(NULL, i + j);
02805     if (!u)
02806         return NULL;
02807 
02808     j = 0;
02809     q = PyString_AS_STRING(u);
02810 
02811     for (p = PyString_AS_STRING(self); p < e; p++)
02812         if (*p == '\t') {
02813             if (tabsize > 0) {
02814                 i = tabsize - (j % tabsize);
02815                 j += i;
02816                 while (i--)
02817                     *q++ = ' ';
02818             }
02819         }
02820         else {
02821             j++;
02822             *q++ = *p;
02823             if (*p == '\n' || *p == '\r')
02824                 j = 0;
02825         }
02826 
02827     return u;
02828 }
02829 
02830 static PyObject *
02831 pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
02832 {
02833     PyObject *u;
02834 
02835     if (left < 0)
02836         left = 0;
02837     if (right < 0)
02838         right = 0;
02839 
02840     if (left == 0 && right == 0 && PyString_CheckExact(self)) {
02841         Py_INCREF(self);
02842         return (PyObject *)self;
02843     }
02844 
02845     u = PyString_FromStringAndSize(NULL,
02846                                    left + PyString_GET_SIZE(self) + right);
02847     if (u) {
02848         if (left)
02849             memset(PyString_AS_STRING(u), fill, left);
02850         memcpy(PyString_AS_STRING(u) + left,
02851                PyString_AS_STRING(self),
02852                PyString_GET_SIZE(self));
02853         if (right)
02854             memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
02855                    fill, right);
02856     }
02857 
02858     return u;
02859 }
02860 
02861 PyDoc_STRVAR(ljust__doc__,
02862 "S.ljust(width[, fillchar]) -> string\n"
02863 "\n"
02864 "Return S left justified in a string of length width. Padding is\n"
02865 "done using the specified fill character (default is a space).");
02866 
02867 static PyObject *
02868 string_ljust(PyStringObject *self, PyObject *args)
02869 {
02870     int width;
02871     char fillchar = ' ';
02872 
02873     if (!PyArg_ParseTuple(args, "i|c:ljust", &width, &fillchar))
02874         return NULL;
02875 
02876     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
02877         Py_INCREF(self);
02878         return (PyObject*) self;
02879     }
02880 
02881     return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
02882 }
02883 
02884 
02885 PyDoc_STRVAR(rjust__doc__,
02886 "S.rjust(width[, fillchar]) -> string\n"
02887 "\n"
02888 "Return S right justified in a string of length width. Padding is\n"
02889 "done using the specified fill character (default is a space)");
02890 
02891 static PyObject *
02892 string_rjust(PyStringObject *self, PyObject *args)
02893 {
02894     int width;
02895     char fillchar = ' ';
02896 
02897     if (!PyArg_ParseTuple(args, "i|c:rjust", &width, &fillchar))
02898         return NULL;
02899 
02900     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
02901         Py_INCREF(self);
02902         return (PyObject*) self;
02903     }
02904 
02905     return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
02906 }
02907 
02908 
02909 PyDoc_STRVAR(center__doc__,
02910 "S.center(width[, fillchar]) -> string\n"
02911 "\n"
02912 "Return S centered in a string of length width. Padding is\n"
02913 "done using the specified fill character (default is a space)");
02914 
02915 static PyObject *
02916 string_center(PyStringObject *self, PyObject *args)
02917 {
02918     Py_ssize_t marg, left;
02919     long width;
02920     char fillchar = ' ';
02921 
02922     if (!PyArg_ParseTuple(args, "l|c:center", &width, &fillchar))
02923         return NULL;
02924 
02925     if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
02926         Py_INCREF(self);
02927         return (PyObject*) self;
02928     }
02929 
02930     marg = width - PyString_GET_SIZE(self);
02931     left = marg / 2 + (marg & width & 1);
02932 
02933     return pad(self, left, marg - left, fillchar);
02934 }
02935 
02936 PyDoc_STRVAR(zfill__doc__,
02937 "S.zfill(width) -> string\n"
02938 "\n"
02939 "Pad a numeric string S with zeros on the left, to fill a field\n"
02940 "of the specified width.  The string S is never truncated.");
02941 
02942 static PyObject *
02943 string_zfill(PyStringObject *self, PyObject *args)
02944 {
02945     Py_ssize_t fill;
02946     PyObject *s;
02947     char *p;
02948 
02949     long width;
02950     if (!PyArg_ParseTuple(args, "l:zfill", &width))
02951         return NULL;
02952 
02953     if (PyString_GET_SIZE(self) >= width) {
02954         if (PyString_CheckExact(self)) {
02955             Py_INCREF(self);
02956             return (PyObject*) self;
02957         }
02958         else
02959             return PyString_FromStringAndSize(
02960                 PyString_AS_STRING(self),
02961                 PyString_GET_SIZE(self)
02962             );
02963     }
02964 
02965     fill = width - PyString_GET_SIZE(self);
02966 
02967     s = pad(self, fill, 0, '0');
02968 
02969     if (s == NULL)
02970         return NULL;
02971 
02972     p = PyString_AS_STRING(s);
02973     if (p[fill] == '+' || p[fill] == '-') {
02974         /* move sign to beginning of string */
02975         p[0] = p[fill];
02976         p[fill] = '0';
02977     }
02978 
02979     return (PyObject*) s;
02980 }
02981 
02982 PyDoc_STRVAR(isspace__doc__,
02983 "S.isspace() -> bool\n\
02984 \n\
02985 Return True if all characters in S are whitespace\n\
02986 and there is at least one character in S, False otherwise.");
02987 
02988 static PyObject*
02989 string_isspace(PyStringObject *self)
02990 {
02991     register const unsigned char *p
02992         = (unsigned char *) PyString_AS_STRING(self);
02993     register const unsigned char *e;
02994 
02995     /* Shortcut for single character strings */
02996     if (PyString_GET_SIZE(self) == 1 &&
02997         isspace(*p))
02998         return PyBool_FromLong(1);
02999 
03000     /* Special case for empty strings */
03001     if (PyString_GET_SIZE(self) == 0)
03002         return PyBool_FromLong(0);
03003 
03004     e = p + PyString_GET_SIZE(self);
03005     for (; p < e; p++) {
03006         if (!isspace(*p))
03007             return PyBool_FromLong(0);
03008     }
03009     return PyBool_FromLong(1);
03010 }
03011 
03012 
03013 PyDoc_STRVAR(isalpha__doc__,
03014 "S.isalpha() -> bool\n\
03015 \n\
03016 Return True if all characters in S are alphabetic\n\
03017 and there is at least one character in S, False otherwise.");
03018 
03019 static PyObject*
03020 string_isalpha(PyStringObject *self)
03021 {
03022     register const unsigned char *p
03023         = (unsigned char *) PyString_AS_STRING(self);
03024     register const unsigned char *e;
03025 
03026     /* Shortcut for single character strings */
03027     if (PyString_GET_SIZE(self) == 1 &&
03028         isalpha(*p))
03029         return PyBool_FromLong(1);
03030 
03031     /* Special case for empty strings */
03032     if (PyString_GET_SIZE(self) == 0)
03033         return PyBool_FromLong(0);
03034 
03035     e = p + PyString_GET_SIZE(self);
03036     for (; p < e; p++) {
03037         if (!isalpha(*p))
03038             return PyBool_FromLong(0);
03039     }
03040     return PyBool_FromLong(1);
03041 }
03042 
03043 
03044 PyDoc_STRVAR(isalnum__doc__,
03045 "S.isalnum() -> bool\n\
03046 \n\
03047 Return True if all characters in S are alphanumeric\n\
03048 and there is at least one character in S, False otherwise.");
03049 
03050 static PyObject*
03051 string_isalnum(PyStringObject *self)
03052 {
03053     register const unsigned char *p
03054         = (unsigned char *) PyString_AS_STRING(self);
03055     register const unsigned char *e;
03056 
03057     /* Shortcut for single character strings */
03058     if (PyString_GET_SIZE(self) == 1 &&
03059         isalnum(*p))
03060         return PyBool_FromLong(1);
03061 
03062     /* Special case for empty strings */
03063     if (PyString_GET_SIZE(self) == 0)
03064         return PyBool_FromLong(0);
03065 
03066     e = p + PyString_GET_SIZE(self);
03067     for (; p < e; p++) {
03068         if (!isalnum(*p))
03069             return PyBool_FromLong(0);
03070     }
03071     return PyBool_FromLong(1);
03072 }
03073 
03074 
03075 PyDoc_STRVAR(isdigit__doc__,
03076 "S.isdigit() -> bool\n\
03077 \n\
03078 Return True if all characters in S are digits\n\
03079 and there is at least one character in S, False otherwise.");
03080 
03081 static PyObject*
03082 string_isdigit(PyStringObject *self)
03083 {
03084     register const unsigned char *p
03085         = (unsigned char *) PyString_AS_STRING(self);
03086     register const unsigned char *e;
03087 
03088     /* Shortcut for single character strings */
03089     if (PyString_GET_SIZE(self) == 1 &&
03090         isdigit(*p))
03091         return PyBool_FromLong(1);
03092 
03093     /* Special case for empty strings */
03094     if (PyString_GET_SIZE(self) == 0)
03095         return PyBool_FromLong(0);
03096 
03097     e = p + PyString_GET_SIZE(self);
03098     for (; p < e; p++) {
03099         if (!isdigit(*p))
03100             return PyBool_FromLong(0);
03101     }
03102     return PyBool_FromLong(1);
03103 }
03104 
03105 
03106 PyDoc_STRVAR(islower__doc__,
03107 "S.islower() -> bool\n\
03108 \n\
03109 Return True if all cased characters in S are lowercase and there is\n\
03110 at least one cased character in S, False otherwise.");
03111 
03112 static PyObject*
03113 string_islower(PyStringObject *self)
03114 {
03115     register const unsigned char *p
03116         = (unsigned char *) PyString_AS_STRING(self);
03117     register const unsigned char *e;
03118     int cased;
03119 
03120     /* Shortcut for single character strings */
03121     if (PyString_GET_SIZE(self) == 1)
03122         return PyBool_FromLong(islower(*p) != 0);
03123 
03124     /* Special case for empty strings */
03125     if (PyString_GET_SIZE(self) == 0)
03126         return PyBool_FromLong(0);
03127 
03128     e = p + PyString_GET_SIZE(self);
03129     cased = 0;
03130     for (; p < e; p++) {
03131         if (isupper(*p))
03132             return PyBool_FromLong(0);
03133         else if (!cased && islower(*p))
03134             cased = 1;
03135     }
03136     return PyBool_FromLong(cased);
03137 }
03138 
03139 
03140 PyDoc_STRVAR(isupper__doc__,
03141 "S.isupper() -> bool\n\
03142 \n\
03143 Return True if all cased characters in S are uppercase and there is\n\
03144 at least one cased character in S, False otherwise.");
03145 
03146 static PyObject*
03147 string_isupper(PyStringObject *self)
03148 {
03149     register const unsigned char *p
03150         = (unsigned char *) PyString_AS_STRING(self);
03151     register const unsigned char *e;
03152     int cased;
03153 
03154     /* Shortcut for single character strings */
03155     if (PyString_GET_SIZE(self) == 1)
03156         return PyBool_FromLong(isupper(*p) != 0);
03157 
03158     /* Special case for empty strings */
03159     if (PyString_GET_SIZE(self) == 0)
03160         return PyBool_FromLong(0);
03161 
03162     e = p + PyString_GET_SIZE(self);
03163     cased = 0;
03164     for (; p < e; p++) {
03165         if (islower(*p))
03166             return PyBool_FromLong(0);
03167         else if (!cased && isupper(*p))
03168             cased = 1;
03169     }
03170     return PyBool_FromLong(cased);
03171 }
03172 
03173 
03174 PyDoc_STRVAR(istitle__doc__,
03175 "S.istitle() -> bool\n\
03176 \n\
03177 Return True if S is a titlecased string and there is at least one\n\
03178 character in S, i.e. uppercase characters may only follow uncased\n\
03179 characters and lowercase characters only cased ones. Return False\n\
03180 otherwise.");
03181 
03182 static PyObject*
03183 string_istitle(PyStringObject *self, PyObject *uncased)
03184 {
03185     register const unsigned char *p
03186         = (unsigned char *) PyString_AS_STRING(self);
03187     register const unsigned char *e;
03188     int cased, previous_is_cased;
03189 
03190     /* Shortcut for single character strings */
03191     if (PyString_GET_SIZE(self) == 1)
03192         return PyBool_FromLong(isupper(*p) != 0);
03193 
03194     /* Special case for empty strings */
03195     if (PyString_GET_SIZE(self) == 0)
03196         return PyBool_FromLong(0);
03197 
03198     e = p + PyString_GET_SIZE(self);
03199     cased = 0;
03200     previous_is_cased = 0;
03201     for (; p < e; p++) {
03202         register const unsigned char ch = *p;
03203 
03204         if (isupper(ch)) {
03205             if (previous_is_cased)
03206                 return PyBool_FromLong(0);
03207             previous_is_cased = 1;
03208             cased = 1;
03209         }
03210         else if (islower(ch)) {
03211             if (!previous_is_cased)
03212                 return PyBool_FromLong(0);
03213             previous_is_cased = 1;
03214             cased = 1;
03215         }
03216         else
03217             previous_is_cased = 0;
03218     }
03219     return PyBool_FromLong(cased);
03220 }
03221 
03222 
03223 PyDoc_STRVAR(splitlines__doc__,
03224 "S.splitlines([keepends]) -> list of strings\n\
03225 \n\
03226 Return a list of the lines in S, breaking at line boundaries.\n\
03227 Line breaks are not included in the resulting list unless keepends\n\
03228 is given and true.");
03229 
03230 static PyObject*
03231 string_splitlines(PyStringObject *self, PyObject *args)
03232 {
03233     register Py_ssize_t i;
03234     register Py_ssize_t j;
03235     Py_ssize_t len;
03236     int keepends = 0;
03237     PyObject *list;
03238     PyObject *str;
03239     char *data;
03240 
03241     if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
03242         return NULL;
03243 
03244     data = PyString_AS_STRING(self);
03245     len = PyString_GET_SIZE(self);
03246 
03247     list = PyList_New(0);
03248     if (!list)
03249         goto onError;
03250 
03251     for (i = j = 0; i < len; ) {
03252         Py_ssize_t eol;
03253 
03254         /* Find a line and append it */
03255         while (i < len && data[i] != '\n' && data[i] != '\r')
03256             i++;
03257 
03258         /* Skip the line break reading CRLF as one line break */
03259         eol = i;
03260         if (i < len) {
03261             if (data[i] == '\r' && i + 1 < len &&
03262                 data[i+1] == '\n')
03263                 i += 2;
03264             else
03265                 i++;
03266             if (keepends)
03267                 eol = i;
03268         }
03269         SPLIT_APPEND(data, j, eol);
03270         j = i;
03271     }
03272     if (j < len) {
03273         SPLIT_APPEND(data, j, len);
03274     }
03275 
03276     return list;
03277 
03278  onError:
03279     Py_DECREF(list);
03280     return NULL;
03281 }
03282 
03283 #undef SPLIT_APPEND
03284 
03285 static PyObject *
03286 string_getnewargs(PyStringObject *v)
03287 {
03288         return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
03289 }
03290 
03291 
03292 static PyMethodDef
03293 string_methods[] = {
03294         /* Counterparts of the obsolete stropmodule functions; except
03295            string.maketrans(). */
03296         {"join", (PyCFunction)string_join, METH_O, join__doc__},
03297         {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
03298         {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
03299         {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
03300         {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
03301         {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
03302         {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
03303         {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
03304         {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
03305         {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
03306         {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
03307         {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
03308         {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
03309          capitalize__doc__},
03310         {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
03311         {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
03312          endswith__doc__},
03313         {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
03314         {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
03315         {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
03316         {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
03317         {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
03318         {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
03319         {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
03320         {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
03321          startswith__doc__},
03322         {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
03323         {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
03324          swapcase__doc__},
03325         {"translate", (PyCFunction)string_translate, METH_VARARGS,
03326          translate__doc__},
03327         {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
03328         {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
03329         {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
03330         {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
03331         {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
03332         {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
03333         {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
03334         {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
03335          expandtabs__doc__},
03336         {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
03337          splitlines__doc__},
03338         {"__getnewargs__",      (PyCFunction)string_getnewargs, METH_NOARGS},
03339         {NULL,     NULL}                     /* sentinel */
03340 };
03341 
03342 static PyObject *
03343 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
03344 
03345 static PyObject *
03346 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
03347 {
03348         PyObject *x = NULL;
03349         static char *kwlist[] = {"object", 0};
03350 
03351         if (type != &PyString_Type)
03352                 return str_subtype_new(type, args, kwds);
03353         if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
03354                 return NULL;
03355         if (x == NULL)
03356                 return PyString_FromString("");
03357         return PyObject_Str(x);
03358 }
03359 
03360 static PyObject *
03361 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
03362 {
03363         PyObject *tmp, *pnew;
03364         Py_ssize_t n;
03365 
03366         assert(PyType_IsSubtype(type, &PyString_Type));
03367         tmp = string_new(&PyString_Type, args, kwds);
03368         if (tmp == NULL)
03369                 return NULL;
03370         assert(PyString_CheckExact(tmp));
03371         n = PyString_GET_SIZE(tmp);
03372         pnew = type->tp_alloc(type, n);
03373         if (pnew != NULL) {
03374                 memcpy(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
03375                 ((PyStringObject *)pnew)->ob_shash =
03376                         ((PyStringObject *)tmp)->ob_shash;
03377                 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
03378         }
03379         Py_DECREF(tmp);
03380         return pnew;
03381 }
03382 
03383 static PyObject *
03384 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
03385 {
03386         PyErr_SetString(PyExc_TypeError,
03387                         "The basestring type cannot be instantiated");
03388         return NULL;
03389 }
03390 
03391 static PyObject *
03392 string_mod(PyObject *v, PyObject *w)
03393 {
03394         if (!PyString_Check(v)) {
03395                 Py_INCREF(Py_NotImplemented);
03396                 return Py_NotImplemented;
03397         }
03398         return PyString_Format(v, w);
03399 }
03400 
03401 PyDoc_STRVAR(basestring_doc,
03402 "Type basestring cannot be instantiated; it is the base for str and unicode.");
03403 
03404 static PyNumberMethods string_as_number = {
03405         0,                      /*nb_add*/
03406         0,                      /*nb_subtract*/
03407         0,                      /*nb_multiply*/
03408         0,                      /*nb_divide*/
03409         string_mod,             /*nb_remainder*/
03410 };
03411 
03412 
03413 PyTypeObject PyBaseString_Type = {
03414         PyObject_HEAD_INIT(&PyType_Type)
03415         0,
03416         "basestring",
03417         0,
03418         0,
03419         0,                                      /* tp_dealloc */
03420         0,                                      /* tp_print */
03421         0,                                      /* tp_getattr */
03422         0,                                      /* tp_setattr */
03423         0,                                      /* tp_compare */
03424         0,                                      /* tp_repr */
03425         0,                                      /* tp_as_number */
03426         0,                                      /* tp_as_sequence */
03427         0,                                      /* tp_as_mapping */
03428         0,                                      /* tp_hash */
03429         0,                                      /* tp_call */
03430         0,                                      /* tp_str */
03431         0,                                      /* tp_getattro */
03432         0,                                      /* tp_setattro */
03433         0,                                      /* tp_as_buffer */
03434         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
03435         basestring_doc,                         /* tp_doc */
03436         0,                                      /* tp_traverse */
03437         0,                                      /* tp_clear */
03438         0,                                      /* tp_richcompare */
03439         0,                                      /* tp_weaklistoffset */
03440         0,                                      /* tp_iter */
03441         0,                                      /* tp_iternext */
03442         0,                                      /* tp_methods */
03443         0,                                      /* tp_members */
03444         0,                                      /* tp_getset */
03445         &PyBaseObject_Type,                     /* tp_base */
03446         0,                                      /* tp_dict */
03447         0,                                      /* tp_descr_get */
03448         0,                                      /* tp_descr_set */
03449         0,                                      /* tp_dictoffset */
03450         0,                                      /* tp_init */
03451         0,                                      /* tp_alloc */
03452         basestring_new,                         /* tp_new */
03453         0,                                      /* tp_free */
03454 };
03455 
03456 PyDoc_STRVAR(string_doc,
03457 "str(object) -> string\n\
03458 \n\
03459 Return a nice string representation of the object.\n\
03460 If the argument is a string, the return value is the same object.");
03461 
03462 PyTypeObject PyString_Type = {
03463         PyObject_HEAD_INIT(&PyType_Type)
03464         0,
03465         "str",
03466         sizeof(PyStringObject),
03467         sizeof(char),
03468         (destructor)string_dealloc,             /* tp_dealloc */
03469         (printfunc)string_print,                /* tp_print */
03470         0,                                      /* tp_getattr */
03471         0,                                      /* tp_setattr */
03472         0,                                      /* tp_compare */
03473         (reprfunc)string_repr,                  /* tp_repr */
03474         &string_as_number,                      /* tp_as_number */
03475         &string_as_sequence,                    /* tp_as_sequence */
03476         &string_as_mapping,                     /* tp_as_mapping */
03477         (hashfunc)string_hash,                  /* tp_hash */
03478         0,                                      /* tp_call */
03479         (reprfunc)string_str,                   /* tp_str */
03480         PyObject_GenericGetAttr,                /* tp_getattro */
03481         0,                                      /* tp_setattro */
03482         &string_as_buffer,                      /* tp_as_buffer */
03483         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES | 
03484                 Py_TPFLAGS_BASETYPE,            /* tp_flags */
03485         string_doc,                             /* tp_doc */
03486         0,                                      /* tp_traverse */
03487         0,                                      /* tp_clear */
03488         (richcmpfunc)string_richcompare,        /* tp_richcompare */
03489         0,                                      /* tp_weaklistoffset */
03490         0,                                      /* tp_iter */
03491         0,                                      /* tp_iternext */
03492         string_methods,                         /* tp_methods */
03493         0,                                      /* tp_members */
03494         0,                                      /* tp_getset */
03495         &PyBaseString_Type,                     /* tp_base */
03496         0,                                      /* tp_dict */
03497         0,                                      /* tp_descr_get */
03498         0,                                      /* tp_descr_set */
03499         0,                                      /* tp_dictoffset */
03500         0,                                      /* tp_init */
03501         0,                                      /* tp_alloc */
03502         string_new,                             /* tp_new */
03503         PyObject_Del,                           /* tp_free */
03504 };
03505 
03506 void
03507 PyString_Concat(register PyObject **pv, register PyObject *w)
03508 {
03509         register PyObject *v;
03510         if (*pv == NULL)
03511                 return;
03512         if (w == NULL || !PyString_Check(*pv)) {
03513                 Py_DECREF(*pv);
03514                 *pv = NULL;
03515                 return;
03516         }
03517         v = string_concat((PyStringObject *) *pv, w);
03518         Py_DECREF(*pv);
03519         *pv = v;
03520 }
03521 
03522 void
03523 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
03524 {
03525         PyString_Concat(pv, w);
03526         Py_XDECREF(w);
03527 }
03528 
03529 
03530 /* The following function breaks the notion that strings are immutable:
03531    it changes the size of a string.  We get away with this only if there
03532    is only one module referencing the object.  You can also think of it
03533    as creating a new string object and destroying the old one, only
03534    more efficiently.  In any case, don't use this if the string may
03535    already be known to some other part of the code...
03536    Note that if there's not enough memory to resize the string, the original
03537    string object at *pv is deallocated, *pv is set to NULL, an "out of
03538    memory" exception is set, and -1 is returned.  Else (on success) 0 is
03539    returned, and the value in *pv may or may not be the same as on input.
03540    As always, an extra byte is allocated for a trailing \0 byte (newsize
03541    does *not* include that), and a trailing \0 byte is stored.
03542 */
03543 
03544 int
03545 _PyString_Resize(PyObject **pv, Py_ssize_t newsize)
03546 {
03547         register PyObject *v;
03548         register PyStringObject *sv;
03549         v = *pv;
03550         if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
03551             PyString_CHECK_INTERNED(v)) {
03552                 *pv = 0;
03553                 Py_DECREF(v);
03554                 PyErr_BadInternalCall();
03555                 return -1;
03556         }
03557         /* XXX UNREF/NEWREF interface should be more symmetrical */
03558         _Py_DEC_REFTOTAL;
03559         _Py_ForgetReference(v);
03560         *pv = (PyObject *)
03561                 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
03562         if (*pv == NULL) {
03563                 PyObject_Del(v);
03564                 PyErr_NoMemory();
03565                 return -1;
03566         }
03567         _Py_NewReference(*pv);
03568         sv = (PyStringObject *) *pv;
03569         sv->ob_size = newsize;
03570         sv->ob_sval[newsize] = '\0';
03571         sv->ob_shash = -1;      /* invalidate cached hash value */
03572         return 0;
03573 }
03574 
03575 /* Helpers for formatstring */
03576 
03577 static PyObject *
03578 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
03579 {
03580         Py_ssize_t argidx = *p_argidx;
03581         if (argidx < arglen) {
03582                 (*p_argidx)++;
03583                 if (arglen < 0)
03584                         return args;
03585                 else
03586                         return PyTuple_GetItem(args, argidx);
03587         }
03588         PyErr_SetString(PyExc_TypeError,
03589                         "not enough arguments for format string");
03590         return NULL;
03591 }
03592 
03593 /* Format codes
03594  * F_LJUST      '-'
03595  * F_SIGN       '+'
03596  * F_BLANK      ' '
03597  * F_ALT        '#'
03598  * F_ZERO       '0'
03599  */
03600 #define F_LJUST (1<<0)
03601 #define F_SIGN  (1<<1)
03602 #define F_BLANK (1<<2)
03603 #define F_ALT   (1<<3)
03604 #define F_ZERO  (1<<4)
03605 
03606 static int
03607 formatfloat(char *buf, size_t buflen, int flags,
03608             int prec, int type, PyObject *v)
03609 {
03610         /* fmt = '%#.' + `prec` + `type`
03611            worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
03612         char fmt[20];
03613         double x;
03614         x = PyFloat_AsDouble(v);
03615         if (x == -1.0 && PyErr_Occurred()) {
03616                 PyErr_SetString(PyExc_TypeError, "float argument required");
03617                 return -1;
03618         }
03619         if (prec < 0)
03620                 prec = 6;
03621         if (type == 'f' && fabs(x)/1e25 >= 1e25)
03622                 type = 'g';
03623         /* Worst case length calc to ensure no buffer overrun:
03624 
03625            'g' formats:
03626              fmt = %#.<prec>g
03627              buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
03628                 for any double rep.)
03629              len = 1 + prec + 1 + 2 + 5 = 9 + prec
03630 
03631            'f' formats:
03632              buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
03633              len = 1 + 50 + 1 + prec = 52 + prec
03634 
03635            If prec=0 the effective precision is 1 (the leading digit is
03636            always given), therefore increase the length by one. 
03637 
03638         */
03639         if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
03640             (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
03641                 PyErr_SetString(PyExc_OverflowError,
03642                         "formatted float is too long (precision too large?)");
03643                 return -1;
03644         }
03645         PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
03646                       (flags&F_ALT) ? "#" : "",
03647                       prec, type);
03648         PyOS_ascii_formatd(buf, buflen, fmt, x);
03649         return (int)strlen(buf);
03650 }
03651 
03652 /* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
03653  * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
03654  * Python's regular ints.
03655  * Return value:  a new PyString*, or NULL if error.
03656  *  .  *pbuf is set to point into it,
03657  *     *plen set to the # of chars following that.
03658  *     Caller must decref it when done using pbuf.
03659  *     The string starting at *pbuf is of the form
03660  *         "-"? ("0x" | "0X")? digit+
03661  *     "0x"/"0X" are present only for x and X conversions, with F_ALT
03662  *         set in flags.  The case of hex digits will be correct,
03663  *     There will be at least prec digits, zero-filled on the left if
03664  *         necessary to get that many.
03665  * val          object to be converted
03666  * flags        bitmask of format flags; only F_ALT is looked at
03667  * prec         minimum number of digits; 0-fill on left if needed
03668  * type         a character in [duoxX]; u acts the same as d
03669  *
03670  * CAUTION:  o, x and X conversions on regular ints can never
03671  * produce a '-' sign, but can for Python's unbounded ints.
03672  */
03673 PyObject*
03674 _PyString_FormatLong(PyObject *val, int flags, int prec, int type,
03675                      char **pbuf, int *plen)
03676 {
03677         PyObject *result = NULL;
03678         char *buf;
03679         Py_ssize_t i;
03680         int sign;       /* 1 if '-', else 0 */
03681         int len;        /* number of characters */
03682         int numdigits;  /* len == numnondigits + numdigits */
03683         int numnondigits = 0;
03684 
03685         switch (type) {
03686         case 'd':
03687         case 'u':
03688                 result = val->ob_type->tp_str(val);
03689                 break;
03690         case 'o':
03691                 result = val->ob_type->tp_as_number->nb_oct(val);
03692                 break;
03693         case 'x':
03694         case 'X':
03695                 numnondigits = 2;
03696                 result = val->ob_type->tp_as_number->nb_hex(val);
03697                 break;
03698         default:
03699                 assert(!"'type' not in [duoxX]");
03700         }
03701         if (!result)
03702                 return NULL;
03703 
03704         /* To modify the string in-place, there can only be one reference. */
03705         if (result->ob_refcnt != 1) {
03706                 PyErr_BadInternalCall();
03707                 return NULL;
03708         }
03709         buf = PyString_AsString(result);
03710         len = PyString_Size(result);
03711         if (buf[len-1] == 'L') {
03712                 --len;
03713                 buf[len] = '\0';
03714         }
03715         sign = buf[0] == '-';
03716         numnondigits += sign;
03717         numdigits = len - numnondigits;
03718         assert(numdigits > 0);
03719 
03720         /* Get rid of base marker unless F_ALT */
03721         if ((flags & F_ALT) == 0) {
03722                 /* Need to skip 0x, 0X or 0. */
03723                 int skipped = 0;
03724                 switch (type) {
03725                 case 'o':
03726                         assert(buf[sign] == '0');
03727                         /* If 0 is only digit, leave it alone. */
03728                         if (numdigits > 1) {
03729                                 skipped = 1;
03730                                 --numdigits;
03731                         }
03732                         break;
03733                 case 'x':
03734                 case 'X':
03735                         assert(buf[sign] == '0');
03736                         assert(buf[sign + 1] == 'x');
03737                         skipped = 2;
03738                         numnondigits -= 2;
03739                         break;
03740                 }
03741                 if (skipped) {
03742                         buf += skipped;
03743                         len -= skipped;
03744                         if (sign)
03745                                 buf[0] = '-';
03746                 }
03747                 assert(len == numnondigits + numdigits);
03748                 assert(numdigits > 0);
03749         }
03750 
03751         /* Fill with leading zeroes to meet minimum width. */
03752         if (prec > numdigits) {
03753                 PyObject *r1 = PyString_FromStringAndSize(NULL,
03754                                         numnondigits + prec);
03755                 char *b1;
03756                 if (!r1) {
03757                         Py_DECREF(result);
03758                         return NULL;
03759                 }
03760                 b1 = PyString_AS_STRING(r1);
03761                 for (i = 0; i < numnondigits; ++i)
03762                         *b1++ = *buf++;
03763                 for (i = 0; i < prec - numdigits; i++)
03764                         *b1++ = '0';
03765                 for (i = 0; i < numdigits; i++)
03766                         *b1++ = *buf++;
03767                 *b1 = '\0';
03768                 Py_DECREF(result);
03769                 result = r1;
03770                 buf = PyString_AS_STRING(result);
03771                 len = numnondigits + prec;
03772         }
03773 
03774         /* Fix up case for hex conversions. */
03775         if (type == 'X') {
03776                 /* Need to convert all lower case letters to upper case.
03777                    and need to convert 0x to 0X (and -0x to -0X). */
03778                 for (i = 0; i < len; i++)
03779                         if (buf[i] >= 'a' && buf[i] <= 'x')
03780                                 buf[i] -= 'a'-'A';
03781         }
03782         *pbuf = buf;
03783         *plen = len;
03784         return result;
03785 }
03786 
03787 static int
03788 formatint(char *buf, size_t buflen, int flags,
03789           int prec, int type, PyObject *v)
03790 {
03791         /* fmt = '%#.' + `prec` + 'l' + `type`
03792            worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
03793            + 1 + 1 = 24 */
03794         char fmt[64];   /* plenty big enough! */
03795         char *sign;
03796         long x;
03797 
03798         x = PyInt_AsLong(v);
03799         if (x == -1 && PyErr_Occurred()) {
03800                 PyErr_SetString(PyExc_TypeError, "int argument required");
03801                 return -1;
03802         }
03803         if (x < 0 && type == 'u') {
03804                 type = 'd';
03805         }
03806         if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
03807                 sign = "-";
03808         else
03809                 sign = "";
03810         if (prec < 0)
03811                 prec = 1;
03812 
03813         if ((flags & F_ALT) &&
03814             (type == 'x' || type == 'X')) {
03815                 /* When converting under %#x or %#X, there are a number
03816                  * of issues that cause pain:
03817                  * - when 0 is being converted, the C standard leaves off
03818                  *   the '0x' or '0X', which is inconsistent with other
03819                  *   %#x/%#X conversions and inconsistent with Python's
03820                  *   hex() function
03821                  * - there are platforms that violate the standard and
03822                  *   convert 0 with the '0x' or '0X'
03823                  *   (Metrowerks, Compaq Tru64)
03824                  * - there are platforms that give '0x' when converting
03825                  *   under %#X, but convert 0 in accordance with the
03826                  *   standard (OS/2 EMX)
03827                  *
03828                  * We can achieve the desired consistency by inserting our
03829                  * own '0x' or '0X' prefix, and substituting %x/%X in place
03830                  * of %#x/%#X.
03831                  *
03832                  * Note that this is the same approach as used in
03833                  * formatint() in unicodeobject.c
03834                  */
03835                 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
03836                               sign, type, prec, type);
03837         }
03838         else {
03839                 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
03840                               sign, (flags&F_ALT) ? "#" : "",
03841                               prec, type);
03842         }
03843 
03844         /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
03845          * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
03846          */
03847         if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
03848                 PyErr_SetString(PyExc_OverflowError,
03849                     "formatted integer is too long (precision too large?)");
03850                 return -1;
03851         }
03852         if (sign[0])
03853                 PyOS_snprintf(buf, buflen, fmt, -x);
03854         else
03855                 PyOS_snprintf(buf, buflen, fmt, x);
03856         return (int)strlen(buf);
03857 }
03858 
03859 static int
03860 formatchar(char *buf, size_t buflen, PyObject *v)
03861 {
03862         /* presume that the buffer is at least 2 characters long */
03863         if (PyString_Check(v)) {
03864                 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
03865                         return -1;
03866         }
03867         else {
03868                 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
03869                         return -1;
03870         }
03871         buf[1] = '\0';
03872         return 1;
03873 }
03874 
03875 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
03876 
03877    FORMATBUFLEN is the length of the buffer in which the floats, ints, &
03878    chars are formatted. XXX This is a magic number. Each formatting
03879    routine does bounds checking to ensure no overflow, but a better
03880    solution may be to malloc a buffer of appropriate size for each
03881    format. For now, the current solution is sufficient.
03882 */
03883 #define FORMATBUFLEN (size_t)120
03884 
03885 PyObject *
03886 PyString_Format(PyObject *format, PyObject *args)
03887 {
03888         char *fmt, *res;
03889         Py_ssize_t arglen, argidx;
03890         Py_ssize_t reslen, rescnt, fmtcnt;
03891         int args_owned = 0;
03892         PyObject *result, *orig_args;
03893 #ifdef Py_USING_UNICODE
03894         PyObject *v, *w;
03895 #endif
03896         PyObject *dict = NULL;
03897         if (format == NULL || !PyString_Check(format) || args == NULL) {
03898                 PyErr_BadInternalCall();
03899                 return NULL;
03900         }
03901         orig_args = args;
03902         fmt = PyString_AS_STRING(format);
03903         fmtcnt = PyString_GET_SIZE(format);
03904         reslen = rescnt = fmtcnt + 100;
03905         result = PyString_FromStringAndSize((char *)NULL, reslen);
03906         if (result == NULL)
03907                 return NULL;
03908         res = PyString_AsString(result);
03909         if (PyTuple_Check(args)) {
03910                 arglen = PyTuple_GET_SIZE(args);
03911                 argidx = 0;
03912         }
03913         else {
03914                 arglen = -1;
03915                 argidx = -2;
03916         }
03917         if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
03918             !PyObject_TypeCheck(args, &PyBaseString_Type))
03919                 dict = args;
03920         while (--fmtcnt >= 0) {
03921                 if (*fmt != '%') {
03922                         if (--rescnt < 0) {
03923                                 rescnt = fmtcnt + 100;
03924                                 reslen += rescnt;
03925                                 if (_PyString_Resize(&result, reslen) < 0)
03926                                         return NULL;
03927                                 res = PyString_AS_STRING(result)
03928                                         + reslen - rescnt;
03929                                 --rescnt;
03930                         }
03931                         *res++ = *fmt++;
03932                 }
03933                 else {
03934                         /* Got a format specifier */
03935                         int flags = 0;
03936                         Py_ssize_t width = -1;
03937                         int prec = -1;
03938                         int c = '\0';
03939                         int fill;
03940                         PyObject *v = NULL;
03941                         PyObject *temp = NULL;
03942                         char *pbuf;
03943                         int sign;
03944                         int len;
03945                         char formatbuf[FORMATBUFLEN];
03946                              /* For format{float,int,char}() */
03947 #ifdef Py_USING_UNICODE
03948                         char *fmt_start = fmt;
03949                         int argidx_start = argidx;
03950 #endif
03951 
03952                         fmt++;
03953                         if (*fmt == '(') {
03954                                 char *keystart;
03955                                 Py_ssize_t keylen;
03956                                 PyObject *key;
03957                                 int pcount = 1;
03958 
03959                                 if (dict == NULL) {
03960                                         PyErr_SetString(PyExc_TypeError,
03961                                                  "format requires a mapping");
03962                                         goto error;
03963                                 }
03964                                 ++fmt;
03965                                 --fmtcnt;
03966                                 keystart = fmt;
03967                                 /* Skip over balanced parentheses */
03968                                 while (pcount > 0 && --fmtcnt >= 0) {
03969                                         if (*fmt == ')')
03970                                                 --pcount;
03971                                         else if (*fmt == '(')
03972                                                 ++pcount;
03973                                         fmt++;
03974                                 }
03975                                 keylen = fmt - keystart - 1;
03976                                 if (fmtcnt < 0 || pcount > 0) {
03977                                         PyErr_SetString(PyExc_ValueError,
03978                                                    "incomplete format key");
03979                                         goto error;
03980                                 }
03981                                 key = PyString_FromStringAndSize(keystart,
03982                                                                  keylen);
03983                                 if (key == NULL)
03984                                         goto error;
03985                                 if (args_owned) {
03986                                         Py_DECREF(args);
03987                                         args_owned = 0;
03988                                 }
03989                                 args = PyObject_GetItem(dict, key);
03990                                 Py_DECREF(key);
03991                                 if (args == NULL) {
03992                                         goto error;
03993                                 }
03994                                 args_owned = 1;
03995                                 arglen = -1;
03996                                 argidx = -2;
03997                         }
03998                         while (--fmtcnt >= 0) {
03999                                 switch (c = *fmt++) {
04000                                 case '-': flags |= F_LJUST; continue;
04001                                 case '+': flags |= F_SIGN; continue;
04002                                 case ' ': flags |= F_BLANK; continue;
04003                                 case '#': flags |= F_ALT; continue;
04004                                 case '0': flags |= F_ZERO; continue;
04005                                 }
04006                                 break;
04007                         }
04008                         if (c == '*') {
04009                                 v = getnextarg(args, arglen, &argidx);
04010                                 if (v == NULL)
04011                                         goto error;
04012                                 if (!PyInt_Check(v)) {
04013                                         PyErr_SetString(PyExc_TypeError,
04014                                                         "* wants int");
04015                                         goto error;
04016                                 }
04017                                 width = PyInt_AsLong(v);
04018                                 if (width < 0) {
04019                                         flags |= F_LJUST;
04020                                         width = -width;
04021                                 }
04022                                 if (--fmtcnt >= 0)
04023                                         c = *fmt++;
04024                         }
04025                         else if (c >= 0 && isdigit(c)) {
04026                                 width = c - '0';
04027                                 while (--fmtcnt >= 0) {
04028                                         c = Py_CHARMASK(*fmt++);
04029                                         if (!isdigit(c))
04030                                                 break;
04031                                         if ((width*10) / 10 != width) {
04032                                                 PyErr_SetString(
04033                                                         PyExc_ValueError,
04034                                                         "width too big");
04035                                                 goto error;
04036                                         }
04037                                         width = width*10 + (c - '0');
04038                                 }
04039                         }
04040                         if (c == '.') {
04041                                 prec = 0;
04042                                 if (--fmtcnt >= 0)
04043                                         c = *fmt++;
04044                                 if (c == '*') {
04045                                         v = getnextarg(args, arglen, &argidx);
04046                                         if (v == NULL)
04047                                                 goto error;
04048                                         if (!PyInt_Check(v)) {
04049                                                 PyErr_SetString(
04050                                                         PyExc_TypeError,
04051                                                         "* wants int");
04052                                                 goto error;
04053                                         }
04054                                         prec = PyInt_AsLong(v);
04055                                         if (prec < 0)
04056                                                 prec = 0;
04057                                         if (--fmtcnt >= 0)
04058                                                 c = *fmt++;
04059                                 }
04060                                 else if (c >= 0 && isdigit(c)) {
04061                                         prec = c - '0';
04062                                         while (--fmtcnt >= 0) {
04063                                                 c = Py_CHARMASK(*fmt++);
04064                                                 if (!isdigit(c))
04065                                                         break;
04066                                                 if ((prec*10) / 10 != prec) {
04067                                                         PyErr_SetString(
04068                                                             PyExc_ValueError,
04069                                                             "prec too big");
04070                                                         goto error;
04071                                                 }
04072                                                 prec = prec*10 + (c - '0');
04073                                         }
04074                                 }
04075                         } /* prec */
04076                         if (fmtcnt >= 0) {
04077                                 if (c == 'h' || c == 'l' || c == 'L') {
04078                                         if (--fmtcnt >= 0)
04079                                                 c = *fmt++;
04080                                 }
04081                         }
04082                         if (fmtcnt < 0) {
04083                                 PyErr_SetString(PyExc_ValueError,
04084                                                 "incomplete format");
04085                                 goto error;
04086                         }
04087                         if (c != '%') {
04088                                 v = getnextarg(args, arglen, &argidx);
04089                                 if (v == NULL)
04090                                         goto error;
04091                         }
04092                         sign = 0;
04093                         fill = ' ';
04094                         switch (c) {
04095                         case '%':
04096                                 pbuf = "%";
04097                                 len = 1;
04098                                 break;
04099                         case 's':
04100 #ifdef Py_USING_UNICODE
04101                                 if (PyUnicode_Check(v)) {
04102                                         fmt = fmt_start;
04103                                         argidx = argidx_start;
04104                                         goto unicode;
04105                                 }
04106 #endif
04107                                 temp = _PyObject_Str(v);
04108 #ifdef Py_USING_UNICODE
04109                                 if (temp != NULL && PyUnicode_Check(temp)) {
04110                                         Py_DECREF(temp);
04111                                         fmt = fmt_start;
04112                                         argidx = argidx_start;
04113                                         goto unicode;
04114                                 }
04115 #endif
04116                                 /* Fall through */
04117                         case 'r':
04118                                 if (c == 'r')
04119                                         temp = PyObject_Repr(v);
04120                                 if (temp == NULL)
04121                                         goto error;
04122                                 if (!PyString_Check(temp)) {
04123                                         PyErr_SetString(PyExc_TypeError,
04124                                           "%s argument has non-string str()");
04125                                         Py_DECREF(temp);
04126                                         goto error;
04127                                 }
04128                                 pbuf = PyString_AS_STRING(temp);
04129                                 len = PyString_GET_SIZE(temp);
04130                                 if (prec >= 0 && len > prec)
04131                                         len = prec;
04132                                 break;
04133                         case 'i':
04134                         case 'd':
04135                         case 'u':
04136                         case 'o':
04137                         case 'x':
04138                         case 'X':
04139                                 if (c == 'i')
04140                                         c = 'd';
04141                                 if (PyLong_Check(v)) {
04142                                         temp = _PyString_FormatLong(v, flags,
04143                                                 prec, c, &pbuf, &len);
04144                                         if (!temp)
04145                                                 goto error;
04146                                         sign = 1;
04147                                 }
04148                                 else {
04149                                         pbuf = formatbuf;
04150                                         len = formatint(pbuf,
04151                                                         sizeof(formatbuf),
04152                                                         flags, prec, c, v);
04153                                         if (len < 0)
04154                                                 goto error;
04155                                         sign = 1;
04156                                 }
04157                                 if (flags & F_ZERO)
04158                                         fill = '0';
04159                                 break;
04160                         case 'e':
04161                         case 'E':
04162                         case 'f':
04163                         case 'F':
04164                         case 'g':
04165                         case 'G':
04166                                 if (c == 'F')
04167                                         c = 'f';
04168                                 pbuf = formatbuf;
04169                                 len = formatfloat(pbuf, sizeof(formatbuf),
04170                                                   flags, prec, c, v);
04171                                 if (len < 0)
04172                                         goto error;
04173                                 sign = 1;
04174                                 if (flags & F_ZERO)
04175                                         fill = '0';
04176                                 break;
04177                         case 'c':
04178 #ifdef Py_USING_UNICODE
04179                                 if (PyUnicode_Check(v)) {
04180                                         fmt = fmt_start;
04181                                         argidx = argidx_start;
04182                                         goto unicode;
04183                                 }
04184 #endif
04185                                 pbuf = formatbuf;
04186                                 len = formatchar(pbuf, sizeof(formatbuf), v);
04187                                 if (len < 0)
04188                                         goto error;
04189                                 break;
04190                         default:
04191                                 PyErr_Format(PyExc_ValueError,
04192                                   "unsupported format character '%c' (0x%x) "
04193                                   "at index %i",
04194                                   c, c,
04195                                   (int)(fmt - 1 - PyString_AsString(format)));
04196                                 goto error;
04197                         }
04198                         if (sign) {
04199                                 if (*pbuf == '-' || *pbuf == '+') {
04200                                         sign = *pbuf++;
04201                                         len--;
04202                                 }
04203                                 else if (flags & F_SIGN)
04204                                         sign = '+';
04205                                 else if (flags & F_BLANK)
04206                                         sign = ' ';
04207                                 else
04208                                         sign = 0;
04209                         }
04210                         if (width < len)
04211                                 width = len;
04212                         if (rescnt - (sign != 0) < width) {
04213                                 reslen -= rescnt;
04214                                 rescnt = width + fmtcnt + 100;
04215                                 reslen += rescnt;
04216                                 if (reslen < 0) {
04217                                         Py_DECREF(result);
04218                                         return PyErr_NoMemory();
04219                                 }
04220                                 if (_PyString_Resize(&result, reslen) < 0)
04221                                         return NULL;
04222                                 res = PyString_AS_STRING(result)
04223                                         + reslen - rescnt;
04224                         }
04225                         if (sign) {
04226                                 if (fill != ' ')
04227                                         *res++ = sign;
04228                                 rescnt--;
04229                                 if (width > len)
04230                                         width--;
04231                         }
04232                         if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
04233                                 assert(pbuf[0] == '0');
04234                                 assert(pbuf[1] == c);
04235                                 if (fill != ' ') {
04236                                         *res++ = *pbuf++;
04237                                         *res++ = *pbuf++;
04238                                 }
04239                                 rescnt -= 2;
04240                                 width -= 2;
04241                                 if (width < 0)
04242                                         width = 0;
04243                                 len -= 2;
04244                         }
04245                         if (width > len && !(flags & F_LJUST)) {
04246                                 do {
04247                                         --rescnt;
04248                                         *res++ = fill;
04249                                 } while (--width > len);
04250                         }
04251                         if (fill == ' ') {
04252                                 if (sign)
04253                                         *res++ = sign;
04254                                 if ((flags & F_ALT) &&
04255                                     (c == 'x' || c == 'X')) {
04256                                         assert(pbuf[0] == '0');
04257                                         assert(pbuf[1] == c);
04258                                         *res++ = *pbuf++;
04259                                         *res++ = *pbuf++;
04260                                 }
04261                         }
04262                         memcpy(res, pbuf, len);
04263                         res += len;
04264                         rescnt -= len;
04265                         while (--width >= len) {
04266                                 --rescnt;
04267                                 *res++ = ' ';
04268                         }
04269                         if (dict && (argidx < arglen) && c != '%') {
04270                                 PyErr_SetString(PyExc_TypeError,
04271                                            "not all arguments converted during string formatting");
04272                                 goto error;
04273                         }
04274                         Py_XDECREF(temp);
04275                 } /* '%' */
04276         } /* until end */
04277         if (argidx < arglen && !dict) {
04278                 PyErr_SetString(PyExc_TypeError,
04279                                 "not all arguments converted during string formatting");
04280                 goto error;
04281         }
04282         if (args_owned) {
04283                 Py_DECREF(args);
04284         }
04285         _PyString_Resize(&result, reslen - rescnt);
04286         return result;
04287 
04288 #ifdef Py_USING_UNICODE
04289  unicode:
04290         if (args_owned) {
04291                 Py_DECREF(args);
04292                 args_owned = 0;
04293         }
04294         /* Fiddle args right (remove the first argidx arguments) */
04295         if (PyTuple_Check(orig_args) && argidx > 0) {
04296                 PyObject *v;
04297                 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
04298                 v = PyTuple_New(n);
04299                 if (v == NULL)
04300                         goto error;
04301                 while (--n >= 0) {
04302                         PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
04303                         Py_INCREF(w);
04304                         PyTuple_SET_ITEM(v, n, w);
04305                 }
04306                 args = v;
04307         } else {
04308                 Py_INCREF(orig_args);
04309                 args = orig_args;
04310         }
04311         args_owned = 1;
04312         /* Take what we have of the result and let the Unicode formatting
04313            function format the rest of the input. */
04314         rescnt = res - PyString_AS_STRING(result);
04315         if (_PyString_Resize(&result, rescnt))
04316                 goto error;
04317         fmtcnt = PyString_GET_SIZE(format) - \
04318                  (fmt - PyString_AS_STRING(format));
04319         format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
04320         if (format == NULL)
04321                 goto error;
04322         v = PyUnicode_Format(format, args);
04323         Py_DECREF(format);
04324         if (v == NULL)
04325                 goto error;
04326         /* Paste what we have (result) to what the Unicode formatting
04327            function returned (v) and return the result (or error) */
04328         w = PyUnicode_Concat(result, v);
04329         Py_DECREF(result);
04330         Py_DECREF(v);
04331         Py_DECREF(args);
04332         return w;
04333 #endif /* Py_USING_UNICODE */
04334 
04335  error:
04336         Py_DECREF(result);
04337         if (args_owned) {
04338                 Py_DECREF(args);
04339         }
04340         return NULL;
04341 }
04342 
04343 void
04344 PyString_InternInPlace(PyObject **p)
04345 {
04346         register PyStringObject *s = (PyStringObject *)(*p);
04347         PyObject *t;
04348         if (s == NULL || !PyString_Check(s))
04349                 Py_FatalError("PyString_InternInPlace: strings only please!");
04350         /* If it's a string subclass, we don't really know what putting
04351            it in the interned dict might do. */
04352         if (!PyString_CheckExact(s))
04353                 return;
04354         if (PyString_CHECK_INTERNED(s))
04355                 return;
04356         if (interned == NULL) {
04357                 interned = PyDict_New();
04358                 if (interned == NULL) {
04359                         PyErr_Clear(); /* Don't leave an exception */
04360                         return;
04361                 }
04362         }
04363         t = PyDict_GetItem(interned, (PyObject *)s);
04364         if (t) {
04365                 Py_INCREF(t);
04366                 Py_DECREF(*p);
04367                 *p = t;
04368                 return;
04369         }
04370 
04371         if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
04372                 PyErr_Clear();
04373                 return;
04374         }
04375         /* The two references in interned are not counted by refcnt.
04376            The string deallocator will take care of this */
04377         s->ob_refcnt -= 2;
04378         PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
04379 }
04380 
04381 void
04382 PyString_InternImmortal(PyObject **p)
04383 {
04384         PyString_InternInPlace(p);
04385         if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
04386                 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
04387                 Py_INCREF(*p);
04388         }
04389 }
04390 
04391 
04392 PyObject *
04393 PyString_InternFromString(const char *cp)
04394 {
04395         PyObject *s = PyString_FromString(cp);
04396         if (s == NULL)
04397                 return NULL;
04398         PyString_InternInPlace(&s);
04399         return s;
04400 }
04401 
04402 void
04403 PyString_Fini(void)
04404 {
04405         int i;
04406         for (i = 0; i < UCHAR_MAX + 1; i++) {
04407                 Py_XDECREF(characters[i]);
04408                 characters[i] = NULL;
04409         }
04410         Py_XDECREF(nullstring);
04411         nullstring = NULL;
04412 }
04413 
04414 void _Py_ReleaseInternedStrings(void)
04415 {
04416         PyObject *keys;
04417         PyStringObject *s;
04418         Py_ssize_t i, n;
04419 
04420         if (interned == NULL || !PyDict_Check(interned))
04421                 return;
04422         keys = PyDict_Keys(interned);
04423         if (keys == NULL || !PyList_Check(keys)) {
04424                 PyErr_Clear();
04425                 return;
04426         }
04427 
04428         /* Since _Py_ReleaseInternedStrings() is intended to help a leak
04429            detector, interned strings are not forcibly deallocated; rather, we
04430            give them their stolen references back, and then clear and DECREF
04431            the interned dict. */
04432            
04433         fprintf(stderr, "releasing interned strings\n");
04434         n = PyList_GET_SIZE(keys);
04435         for (i = 0; i < n; i++) {
04436                 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
04437                 switch (s->ob_sstate) {
04438                 case SSTATE_NOT_INTERNED:
04439                         /* XXX Shouldn't happen */
04440                         break;
04441                 case SSTATE_INTERNED_IMMORTAL:
04442                         s->ob_refcnt += 1;
04443                         break;
04444                 case SSTATE_INTERNED_MORTAL:
04445                         s->ob_refcnt += 2;
04446                         break;
04447                 default:
04448                         Py_FatalError("Inconsistent interned string state.");
04449                 }
04450                 s->ob_sstate = SSTATE_NOT_INTERNED;
04451         }
04452         Py_DECREF(keys);
04453         PyDict_Clear(interned);
04454         Py_DECREF(interned);
04455         interned = NULL;
04456 }

Generated on Wed Mar 1 20:34:45 2006 for python by  doxygen 1.4.2