Vuo: base/VuoStringUtilities.cc Source File

Go to the documentation of this file.
  
 #include <sstream>
 #include <CoreFoundation/CoreFoundation.h>
 #include "VuoStringUtilities.hh"
  
 extern "C" {
 #include "mkdio.h"
 }
  
 bool VuoStringUtilities::beginsWith(string wholeString, string beginning)
 {
     return wholeString.length() >= beginning.length() && wholeString.substr(0, beginning.length()) == beginning;
 }
  
 bool VuoStringUtilities::endsWith(string wholeString, string ending)
 {
     if (wholeString.length() < ending.length())
         return false;
  
     return wholeString.compare(wholeString.length()-ending.length(), ending.length(), ending) == 0;
 }
  
 string VuoStringUtilities::substrAfter(string wholeString, string beginning)
 {
     if (! beginsWith(wholeString, beginning))
         return "";
  
     return wholeString.substr(beginning.length());
 }
  
 string VuoStringUtilities::substrBefore(string wholeString, string ending)
 {
     if (! endsWith(wholeString, ending))
         return "";
  
     return wholeString.substr(0, wholeString.length()-ending.length());
 }
  
 string VuoStringUtilities::replaceAll(string wholeString, char originalChar, char replacementChar)
 {
     string outString = wholeString;
     size_t pos = 0;
     while ((pos = wholeString.find_first_of(originalChar, pos)) != string::npos)
     {
         outString[pos] = replacementChar;
         pos = pos + 1;
     }
     return outString;
 }
  
 size_t VuoStringUtilities::replaceAll(string &wholeString, string originalSubstring, string replacementSubstring)
 {
     size_t replacementCount = 0;
     size_t startPos = 0;
     while ((startPos = wholeString.find(originalSubstring, startPos)) != string::npos)
     {
         wholeString.replace(startPos, originalSubstring.length(), replacementSubstring);
         startPos += replacementSubstring.length();
         ++replacementCount;
     }
     return replacementCount;
 }
  
 vector<string> VuoStringUtilities::split(const string &wholeString, char delimiter)
 {
     vector<string> tokens;
     istringstream iss(wholeString);
     string token;
     while( getline(iss, token, delimiter) )
         tokens.push_back(token);
     return tokens;
 }
  
 string VuoStringUtilities::join(vector<string> partialStrings, char delimiter)
 {
     string delimiterStr(1, delimiter);
     return join(partialStrings, delimiterStr);
 }
  
 string VuoStringUtilities::join(vector<string> partialStrings, string delimiter)
 {
     string wholeString;
     for (vector<string>::iterator i = partialStrings.begin(); i != partialStrings.end(); )
     {
         wholeString += *i;
         if (++i != partialStrings.end())
             wholeString += delimiter;
     }
     return wholeString;
 }
  
 string VuoStringUtilities::join(set<string> partialStrings, string delimiter)
 {
     string wholeString;
     for (set<string>::iterator i = partialStrings.begin(); i != partialStrings.end(); )
     {
         wholeString += *i;
         if (++i != partialStrings.end())
             wholeString += delimiter;
     }
     return wholeString;
 }
  
 string VuoStringUtilities::trim(string originalString)
 {
     string whitespace = " \t\v\n\r\f";
  
     string::size_type begin = originalString.find_first_not_of(whitespace);
     if (begin == std::string::npos)
         return "";
  
     string::size_type end = originalString.find_last_not_of(whitespace);
  
     return originalString.substr(begin, end - begin + 1);
 }
  
 string VuoStringUtilities::buildCompositionIdentifier(const string &parentCompositionIdentifier, const string &nodeIdentifier)
 {
     return parentCompositionIdentifier + "/" + nodeIdentifier;
 }
  
 string VuoStringUtilities::buildPortIdentifier(const string &nodeIdentifier, const string &portName)
 {
     return nodeIdentifier + ":" + portName;
 }
  
 string VuoStringUtilities::prefixSymbolName(string symbolName, string moduleKey)
 {
     return transcodeToIdentifier(moduleKey) + "__" + symbolName;
 }
  
 string VuoStringUtilities::transcodeToIdentifier(string str)
 {
     CFMutableStringRef strCF = CFStringCreateMutable(NULL, 0);
     CFStringAppendCString(strCF, str.c_str(), kCFStringEncodingUTF8);
  
     CFStringNormalize(strCF, kCFStringNormalizationFormD);  // decomposes combining characters, so accents/diacritics are separated from their letters
  
     CFIndex strLength = CFStringGetLength(strCF);
     UniChar *strBuf = (UniChar *)malloc(strLength * sizeof(UniChar));
     if (!strBuf)
     {
         CFRelease(strCF);
         return string();
     }
     CFStringGetCharacters(strCF, CFRangeMake(0, strLength), strBuf);
  
     CFStringRef empty = CFStringCreateWithCString(NULL, "", kCFStringEncodingUTF8);
     CFStringRef underscore = CFStringCreateWithCString(NULL, "_", kCFStringEncodingUTF8);
     CFStringRef doubleUnderscore = CFStringCreateWithCString(NULL, "__", kCFStringEncodingUTF8);
     if (!empty || !underscore || !doubleUnderscore)
         return string();
  
     for (CFIndex i = strLength-1; i >= 0; --i)
     {
         UniChar c = strBuf[i];
  
         CFStringRef replacement = NULL;
         if (c > 127)  // non-ASCII
             replacement = empty;
         else if (c == '.' || isspace(c))
             replacement = underscore;
         else if (c == '/' || c == ':')
             replacement = doubleUnderscore;
         else if (! isValidCharInIdentifier(c))
             replacement = empty;
  
         if (replacement)
             CFStringReplace(strCF, CFRangeMake(i, 1), replacement);
     }
  
     // https://stackoverflow.com/questions/1609565/whats-the-cfstring-equiv-of-nsstrings-utf8string
  
     const char *useUTF8StringPtr = NULL;
     char *freeUTF8StringPtr = NULL;
  
     if ((useUTF8StringPtr = CFStringGetCStringPtr(strCF, kCFStringEncodingUTF8)) == NULL)
     {
         CFIndex maxBytes = 4 * strLength + 1;
         freeUTF8StringPtr = (char *)malloc(maxBytes);
         CFStringGetCString(strCF, freeUTF8StringPtr, maxBytes, kCFStringEncodingUTF8);
         useUTF8StringPtr = freeUTF8StringPtr;
     }
  
     string ret = useUTF8StringPtr;
  
     if (freeUTF8StringPtr != NULL)
         free(freeUTF8StringPtr);
  
     CFRelease(strCF);
     CFRelease(empty);
     CFRelease(underscore);
     CFRelease(doubleUnderscore);
     free(strBuf);
  
     return ret;
 }
  
 bool VuoStringUtilities::isValidCharInIdentifier(char ch)
 {
     return isalnum(ch) || ch == '_';
 }
  
 string VuoStringUtilities::transcodeToGraphvizIdentifier(const string &originalString)
 {
     string escapedString = originalString;
     for (string::size_type i = 0; (i = escapedString.find("\\", i)) != std::string::npos; i += 2)
         escapedString.replace(i, 1, "\\\\");
     for (string::size_type i = 0; (i = escapedString.find("\"", i)) != std::string::npos; i += 2)
         escapedString.replace(i, 1, "\\\"");
     for (string::size_type i = 0; (i = escapedString.find("{", i)) != std::string::npos; i += 2)
         escapedString.replace(i, 1, "\\{");
     for (string::size_type i = 0; (i = escapedString.find("}", i)) != std::string::npos; i += 2)
         escapedString.replace(i, 1, "\\}");
     for (string::size_type i = 0; (i = escapedString.find("<", i)) != std::string::npos; i += 2)
         escapedString.replace(i, 1, "\\<");
     for (string::size_type i = 0; (i = escapedString.find(">", i)) != std::string::npos; i += 2)
         escapedString.replace(i, 1, "\\>");
     for (string::size_type i = 0; (i = escapedString.find("|", i)) != std::string::npos; i += 2)
         escapedString.replace(i, 1, "\\|");
     for (string::size_type i = 0; (i = escapedString.find("  ", i)) != std::string::npos; i += 3)
         escapedString.replace(i, 2, " \\ ");
     return escapedString;
 }
  
 string VuoStringUtilities::transcodeFromGraphvizIdentifier(const string &graphvizIdentifier)
 {
     string unescapedString;
     bool inEscape = false;
     for (string::const_iterator i = graphvizIdentifier.begin(); i != graphvizIdentifier.end(); ++i)
     {
         if (inEscape)
         {
             inEscape = false;
             unescapedString += *i;
             continue;
         }
  
         if (*i == '\\')
         {
             inEscape = true;
             continue;
         }
  
         unescapedString  += *i;
     }
     return unescapedString;
 }
  
 string VuoStringUtilities::formUniqueIdentifier(set<string> &takenIdentifiers,
                                                 const string &preferredIdentifier, const string &identifierPrefix)
 {
     auto isIdentifierAvailable = [&takenIdentifiers] (const string &identifier)
     {
         return takenIdentifiers.find(identifier) == takenIdentifiers.end();
     };
  
     string uniqueIdentifier = formUniqueIdentifier(isIdentifierAvailable, preferredIdentifier, identifierPrefix);
     takenIdentifiers.insert(uniqueIdentifier);
     return uniqueIdentifier;
 }
  
 string VuoStringUtilities::formUniqueIdentifier(std::function<bool(const string &)> isIdentifierAvailable,
                                                 const string &preferredIdentifier, const string &identifierPrefix)
 {
     string unique = preferredIdentifier;
     string prefix = (! identifierPrefix.empty() ? identifierPrefix : preferredIdentifier);
     int suffix = 2;
  
     while (! isIdentifierAvailable(unique))
     {
         ostringstream oss;
         oss << prefix << suffix++;
         unique = oss.str();
     }
  
     return unique;
 }
  
 string VuoStringUtilities::generateHtmlFromMarkdown(const string &markdownString)
 {
     MMIOT *doc = mkd_string(markdownString.c_str(), markdownString.length(), MKD_NOPANTS);
     mkd_compile(doc, 0);
     char *html;
     mkd_document(doc, &html);
     string htmlString(html);
     mkd_cleanup(doc);
  
     // Remove the final linebreak from code blocks,
     // since Qt (unlike typical browser rendering engines) considers that whitespace significant.
     replaceAll(htmlString, "\n</code></pre>", "</code></pre>");
  
     return htmlString;
 }
  
 string VuoStringUtilities::generateHtmlFromMarkdownLine(const string &markdownString)
 {
     size_t length = markdownString.length();
     if (!length)
         return "";
  
     char *html;
     mkd_line((char *)markdownString.c_str(), length, &html, MKD_NOPANTS);
     string htmlString(html);
     free(html);
     return htmlString;
 }
  
 string VuoStringUtilities::convertToCamelCase(const string &originalString,
                                               bool forceFirstLetterToUpper, bool forceFirstLetterToLower, bool forceInterveningLettersToLower,
                                               bool allowSeparatorDots)
 {
     string camelCaseString;
     bool first = true;
     bool uppercaseNext = forceFirstLetterToUpper;
     bool lowercaseNext = forceFirstLetterToLower;
     bool previousWasDot = false;
     for (string::const_iterator i = originalString.begin(); i != originalString.end(); ++i)
     {
         if (first && !isalpha(*i))
             continue;
         first = false;
  
         bool isDot = *i == '.';
         if (allowSeparatorDots && isDot)
         {
             if (previousWasDot)
                 continue;
             uppercaseNext = false;
         }
         else if (!isalnum(*i))
         {
             uppercaseNext = true;
             continue;
         }
  
         if (uppercaseNext)
             camelCaseString += toupper(*i);
         else if (lowercaseNext)
             camelCaseString += tolower(*i);
         else
             camelCaseString += *i;
  
         uppercaseNext = false;
         lowercaseNext = forceInterveningLettersToLower;
         previousWasDot = isDot;
     }
  
     // Trim trailing dots.
     if (allowSeparatorDots)
         while (endsWith(camelCaseString, "."))
             camelCaseString = substrBefore(camelCaseString, ".");
  
     return camelCaseString;
 }
  
 string VuoStringUtilities::expandCamelCase(string camelCaseString)
 {
     // Only apply these transformations if the whole string matches,
     // since they may appear as substrings in contexts where they shouldn't be all-caps.
     if (camelCaseString == "x")
         return "X";
     else if (camelCaseString == "y")
         return "Y";
     else if (camelCaseString == "z")
         return "Z";
     else if (camelCaseString == "w")
         return "W";
     else if (camelCaseString == "xy")
         return "XY";
     else if (camelCaseString == "osc")
         return "OSC";
  
     string out;
     out += toupper(camelCaseString[0]);
  
     size_t length = camelCaseString.length();
     for (int i = 1; i < length; ++i)
     {
         char c = camelCaseString[i];
         if (isupper(c) || (isdigit(c) && !isdigit(camelCaseString[i-1])))
             out += " ";
         out += c;
     }
  
     string allCaps[]{
         "2d",
         "3d",
         "4d",
         "Xyzw",
         "Xyz",
         "Rgbaw",
         "Rgba",
         "Rgbw",
         "Rgb",
         "Wwcw",
         "Cmy",
         "Hsl",
         "Hdmi",
         "Sdi",
         "Ntsc",
 //      "Pal",  // Appears in Leap Motion "Palm Velocity".
         "Url",
         "Midi",
         "Rss",
         "Csv",
         "Tsv",
         "Ascii",
         "Json",
         "Xml",
         "Dmx",
     };
     for (auto lower : allCaps)
     {
         string upper = lower;
         std::transform(upper.begin(), upper.end(), upper.begin(), ::toupper);
         VuoStringUtilities::replaceAll(out, lower, upper);
     }
  
     return out;
 }
  
 string VuoStringUtilities::makeRandomHash(int length)
 {
     static const char alphanum[] =
         "0123456789"
         "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
         "abcdefghijklmnopqrstuvwxyz";
  
     string hash(length, 0);
     for (int i = 0; i < length; ++i)
         hash[i] = alphanum[arc4random_uniform(sizeof(alphanum)-1)];
  
     return hash;
 }
  
 const std::locale VuoStringUtilities::locale;
 const std::collate<char> &VuoStringUtilities::collate = std::use_facet<std::collate<char> >(VuoStringUtilities::locale);