ロゴ メインコンテンツへ
RSSフィード
「ソフトウェア開発」に関連する記事一覧

【C#】VOICEVOXで音声再生、音声保存するサンプル

2023/04/07
(この記事の文字数: 119)

C#からVOICEVOXの音声再生&音声をwavで保存するユーティリティーを作ったのでソースコードを掲載しておきます。

using System.Collections.Generic;
using System.Media;
using System.Net.Http;
using System.Net.Http.Headers;
using System.Text.Json;
using System.Text.Json.Serialization;
using System.Threading.Tasks;

public static class VoicevoxUtility
{
    const string baseUrl = "http://127.0.0.1:50021/"; // localhostだとレスポンスが遅いのアドレス指定
    private static readonly HttpClient httpClient = new HttpClient();

    public static async Task Speek(string text, int speakerId)
    {
        string query = await CreateAudioQuery(text, speakerId);

        // 音声合成
        using var request = new HttpRequestMessage(new HttpMethod("POST"), $"{baseUrl}synthesis?speaker={speakerId}&enable_interrogative_upspeak=true");
        request.Headers.TryAddWithoutValidation("accept", "audio/wav");

        request.Content = new StringContent(query);
        request.Content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/json");

        var response = await httpClient.SendAsync(request);

        // 音声再生
        using var httpStream = await response.Content.ReadAsStreamAsync();
        var player = new SoundPlayer(httpStream);
        player.PlaySync();
    }

    public static async Task RecordSpeech(string outputWaveFilePath, string text, int speaker)
    {
        string query = await CreateAudioQuery(text, speaker);

        // 音声合成
        using var request = new HttpRequestMessage(new HttpMethod("POST"), $"{baseUrl}synthesis?speaker={speaker}&enable_interrogative_upspeak=true");
        request.Headers.TryAddWithoutValidation("accept", "audio/wav");

        request.Content = new StringContent(query);
        request.Content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/json");

        var response = await httpClient.SendAsync(request);

        // 書き出し
        using var fs = System.IO.File.Create(outputWaveFilePath);
        using var stream = await response.Content.ReadAsStreamAsync();
        stream.CopyTo(fs);
        fs.Flush();
    }

    private static async Task<string> CreateAudioQuery(string text, int speakerId)
    {
        using var requestMessage = new HttpRequestMessage(new HttpMethod("POST"), $"{baseUrl}audio_query?text={text}&speaker={speakerId}");
        requestMessage.Headers.TryAddWithoutValidation("accept", "application/json");

        requestMessage.Content = new StringContent("");
        requestMessage.Content.Headers.ContentType = MediaTypeHeaderValue.Parse("application/x-www-form-urlencoded");
        var response = await httpClient.SendAsync(requestMessage);
        return await response.Content.ReadAsStringAsync();
    }
}

以下は使用例です。

// 直接再生
VoicevoxUtility.Speek("これは直接再生するテストです", 39).Wait();

// 音声ファイルに保存してから再生
var wavePath = "output.wav";
VoicevoxUtility.RecordSpeech(wavePath, "これは録音テストです", 39).Wait();
var player = new SoundPlayer(wavePath);
player.PlaySync();

ちなみに audio_query からのレスポンスは以下のような感じになりました(見やすく整形してます)。

{
   "accent_phrases":[
      {
         "moras":[
            {
               "text":"コ",
               "consonant":"k",
               "consonant_length":0.06594131141901016,
               "vowel":"o",
               "vowel_length":0.06108786165714264,
               "pitch":5.282137870788574
            },
            {
               "text":"レ",
               "consonant":"r",
               "consonant_length":0.021974779665470123,
               "vowel":"e",
               "vowel_length":0.058412015438079834,
               "pitch":5.355251789093018
            },
            {
               "text":"ワ",
               "consonant":"w",
               "consonant_length":0.03409413993358612,
               "vowel":"a",
               "vowel_length":0.0845031589269638,
               "pitch":5.379842281341553
            }
         ],
         "accent":3,
         "pause_mora":null,
         "is_interrogative":false
      },
      {
         "moras":[
            {
               "text":"チョ",
               "consonant":"ch",
               "consonant_length":0.0630991980433464,
               "vowel":"o",
               "vowel_length":0.060945361852645874,
               "pitch":5.118475437164307
            },
            {
               "text":"ク",
               "consonant":"k",
               "consonant_length":0.04479902237653732,
               "vowel":"U",
               "vowel_length":0.0330185741186142,
               "pitch":0.0
            },
            {
               "text":"セ",
               "consonant":"s",
               "consonant_length":0.049481965601444244,
               "vowel":"e",
               "vowel_length":0.06068601459264755,
               "pitch":5.565320014953613
            },
            {
               "text":"ツ",
               "consonant":"ts",
               "consonant_length":0.0557292103767395,
               "vowel":"U",
               "vowel_length":0.039859622716903687,
               "pitch":0.0
            }
         ],
         "accent":4,
         "pause_mora":null,
         "is_interrogative":false
      },
      {
         "moras":[
            {
               "text":"サ",
               "consonant":"s",
               "consonant_length":0.06819044798612595,
               "vowel":"a",
               "vowel_length":0.06987302750349045,
               "pitch":5.522080898284912
            },
            {
               "text":"イ",
               "consonant":null,
               "consonant_length":null,
               "vowel":"i",
               "vowel_length":0.05302189290523529,
               "pitch":5.533734321594238
            },
            {
               "text":"セ",
               "consonant":"s",
               "consonant_length":0.07046963274478912,
               "vowel":"e",
               "vowel_length":0.052582331001758575,
               "pitch":5.589731216430664
            },
            {
               "text":"エ",
               "consonant":null,
               "consonant_length":null,
               "vowel":"e",
               "vowel_length":0.0676243007183075,
               "pitch":5.560169219970703
            }
         ],
         "accent":4,
         "pause_mora":null,
         "is_interrogative":false
      },
      {
         "moras":[
            {
               "text":"ス",
               "consonant":"s",
               "consonant_length":0.08211246132850647,
               "vowel":"u",
               "vowel_length":0.034779004752635956,
               "pitch":5.563896179199219
            },
            {
               "text":"ル",
               "consonant":"r",
               "consonant_length":0.0238371342420578,
               "vowel":"u",
               "vowel_length":0.060263603925704956,
               "pitch":5.503627300262451
            }
         ],
         "accent":2,
         "pause_mora":null,
         "is_interrogative":false
      },
      {
         "moras":[
            {
               "text":"テ",
               "consonant":"t",
               "consonant_length":0.04385934770107269,
               "vowel":"e",
               "vowel_length":0.057315394282341,
               "pitch":5.5735626220703125
            },
            {
               "text":"ス",
               "consonant":"s",
               "consonant_length":0.030951015651226044,
               "vowel":"U",
               "vowel_length":0.04976283758878708,
               "pitch":0.0
            },
            {
               "text":"ト",
               "consonant":"t",
               "consonant_length":0.04408574849367142,
               "vowel":"o",
               "vowel_length":0.055898360908031464,
               "pitch":5.339382171630859
            },
            {
               "text":"デ",
               "consonant":"d",
               "consonant_length":0.0317467525601387,
               "vowel":"e",
               "vowel_length":0.05603151023387909,
               "pitch":5.168008804321289
            },
            {
               "text":"ス",
               "consonant":"s",
               "consonant_length":0.05199216306209564,
               "vowel":"U",
               "vowel_length":0.10056409239768982,
               "pitch":0.0
            }
         ],
         "accent":1,
         "pause_mora":null,
         "is_interrogative":false
      }
   ],
   "speedScale":1.0,
   "pitchScale":0.0,
   "intonationScale":1.0,
   "volumeScale":1.0,
   "prePhonemeLength":0.1,
   "postPhonemeLength":0.1,
   "outputSamplingRate":24000,
   "outputStereo":false,
   "kana":"コレワ'/チョ_クセ_ツ'/サイセエ'/スル'/テ'_ストデ_ス"
}

  このエントリーをはてなブックマークに追加  

<<「ソフトウェア開発」の記事一覧に戻る

コメント(0 件)



コンテンツロード: 0.009 sec
Copyright(C)2006-2024 puarts All Rights Reserved